diff options
149 files changed, 6413 insertions, 10886 deletions
diff --git a/ML_ADMConstraints/schedule.ccl b/ML_ADMConstraints/schedule.ccl index a57f9b5..095a226 100644 --- a/ML_ADMConstraints/schedule.ccl +++ b/ML_ADMConstraints/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMConstraints_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,15 @@ schedule group ML_ADMConstraints_group in MoL_PseudoEvolution after MoL_PostStep schedule ML_ADMConstraints in ML_ADMConstraints_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints::ML_Ham + WRITES: ML_ADMConstraints::ML_mom } "ML_ADMConstraints" schedule ML_ADMConstraints_SelectBCs in ML_ADMConstraints_bc_group @@ -90,6 +93,12 @@ schedule ML_ADMConstraints_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_ApplyBCs in MoL_PostStep after ML_ADMConstraints_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints/src/ML_ADMConstraints.cc b/ML_ADMConstraints/src/ML_ADMConstraints.cc index 1f30b9a..4c59931 100644 --- a/ML_ADMConstraints/src/ML_ADMConstraints.cc +++ b/ML_ADMConstraints/src/ML_ADMConstraints.cc @@ -43,8 +43,6 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -92,7 +90,7 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMConstraints, + CCTK_LOOP3(ML_ADMConstraints, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -501,7 +499,7 @@ static void ML_ADMConstraints_Body(cGH const * restrict const cctkGH, int const M2[index] = M2L; M3[index] = M3L; } - CCTK_ENDLOOP3 (ML_ADMConstraints); + CCTK_ENDLOOP3(ML_ADMConstraints); } extern "C" void ML_ADMConstraints(CCTK_ARGUMENTS) @@ -520,12 +518,18 @@ extern "C" void ML_ADMConstraints(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints::ML_Ham","ML_ADMConstraints::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints::ML_Ham", + "ML_ADMConstraints::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints/src/make.code.defn b/ML_ADMConstraints/src/make.code.defn index e9b2afb..d94f02e 100644 --- a/ML_ADMConstraints/src/make.code.defn +++ b/ML_ADMConstraints/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints.cc Boundaries.cc diff --git a/ML_ADMConstraints_MP/schedule.ccl b/ML_ADMConstraints_MP/schedule.ccl index dfa7202..a20f509 100644 --- a/ML_ADMConstraints_MP/schedule.ccl +++ b/ML_ADMConstraints_MP/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMConstraints_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,15 @@ schedule group ML_ADMConstraints_MP_group in MoL_PseudoEvolution after MoL_PostS schedule ML_ADMConstraints_MP in ML_ADMConstraints_MP_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints_MP::ML_Ham + WRITES: ML_ADMConstraints_MP::ML_mom } "ML_ADMConstraints_MP" schedule ML_ADMConstraints_MP_SelectBCs in ML_ADMConstraints_MP_bc_group @@ -90,6 +93,12 @@ schedule ML_ADMConstraints_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_MP_ApplyBCs in MoL_PostStep after ML_ADMConstraints_MP_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc b/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc index 53b6445..85f83c7 100644 --- a/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc +++ b/ML_ADMConstraints_MP/src/ML_ADMConstraints_MP.cc @@ -43,8 +43,6 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -137,7 +135,7 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMConstraints_MP, + CCTK_LOOP3(ML_ADMConstraints_MP, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -1305,7 +1303,7 @@ static void ML_ADMConstraints_MP_Body(cGH const * restrict const cctkGH, int con M2[index] = M2L; M3[index] = M3L; } - CCTK_ENDLOOP3 (ML_ADMConstraints_MP); + CCTK_ENDLOOP3(ML_ADMConstraints_MP); } extern "C" void ML_ADMConstraints_MP(CCTK_ARGUMENTS) @@ -1324,12 +1322,18 @@ extern "C" void ML_ADMConstraints_MP(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints_MP::ML_Ham","ML_ADMConstraints_MP::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints_MP::ML_Ham", + "ML_ADMConstraints_MP::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints_MP", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints_MP", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_MP_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_MP_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints_MP/src/make.code.defn b/ML_ADMConstraints_MP/src/make.code.defn index c8add04..da1ced9 100644 --- a/ML_ADMConstraints_MP/src/make.code.defn +++ b/ML_ADMConstraints_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints_MP.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints_MP.cc Boundaries.cc diff --git a/ML_ADMConstraints_O2/schedule.ccl b/ML_ADMConstraints_O2/schedule.ccl index c43ddfe..c1489cb 100644 --- a/ML_ADMConstraints_O2/schedule.ccl +++ b/ML_ADMConstraints_O2/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMConstraints_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMConstraints_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMConstraints_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,15 @@ schedule group ML_ADMConstraints_O2_group in MoL_PseudoEvolution after MoL_PostS schedule ML_ADMConstraints_O2 in ML_ADMConstraints_O2_group { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMConstraints_O2::ML_Ham + WRITES: ML_ADMConstraints_O2::ML_mom } "ML_ADMConstraints_O2" schedule ML_ADMConstraints_O2_SelectBCs in ML_ADMConstraints_O2_bc_group @@ -90,6 +93,12 @@ schedule ML_ADMConstraints_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMConstraints_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMConstraints_O2_ApplyBCs in MoL_PostStep after ML_ADMConstraints_O2_SelectBoundConds { # no language specified diff --git a/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc b/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc index 59566f7..d00d9c1 100644 --- a/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc +++ b/ML_ADMConstraints_O2/src/ML_ADMConstraints_O2.cc @@ -43,8 +43,6 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -92,7 +90,7 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMConstraints_O2, + CCTK_LOOP3(ML_ADMConstraints_O2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -501,7 +499,7 @@ static void ML_ADMConstraints_O2_Body(cGH const * restrict const cctkGH, int con M2[index] = M2L; M3[index] = M3L; } - CCTK_ENDLOOP3 (ML_ADMConstraints_O2); + CCTK_ENDLOOP3(ML_ADMConstraints_O2); } extern "C" void ML_ADMConstraints_O2(CCTK_ARGUMENTS) @@ -520,12 +518,18 @@ extern "C" void ML_ADMConstraints_O2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_ADMConstraints_O2::ML_Ham","ML_ADMConstraints_O2::ML_mom"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_ADMConstraints_O2::ML_Ham", + "ML_ADMConstraints_O2::ML_mom"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMConstraints_O2", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMConstraints_O2", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMConstraints_O2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMConstraints_O2_Body); if (verbose > 1) { diff --git a/ML_ADMConstraints_O2/src/make.code.defn b/ML_ADMConstraints_O2/src/make.code.defn index 85e9132..751150b 100644 --- a/ML_ADMConstraints_O2/src/make.code.defn +++ b/ML_ADMConstraints_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMConstraints_O2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMConstraints_O2.cc Boundaries.cc diff --git a/ML_ADMQuantities/schedule.ccl b/ML_ADMQuantities/schedule.ccl index 3316295..cd3e5dd 100644 --- a/ML_ADMQuantities/schedule.ccl +++ b/ML_ADMQuantities/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMQuantities_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,20 @@ schedule group ML_ADMQuantities_group in MoL_PseudoEvolution after MoL_PostStep schedule ML_ADMQuantities in ML_ADMQuantities_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities::ML_Jadm + WRITES: ML_ADMQuantities::ML_Madm } "ML_ADMQuantities" schedule ML_ADMQuantities_SelectBCs in ML_ADMQuantities_bc_group @@ -90,6 +98,12 @@ schedule ML_ADMQuantities_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_ApplyBCs in MoL_PostStep after ML_ADMQuantities_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities/src/ML_ADMQuantities.cc b/ML_ADMQuantities/src/ML_ADMQuantities.cc index 301cf82..4bf679f 100644 --- a/ML_ADMQuantities/src/ML_ADMQuantities.cc +++ b/ML_ADMQuantities/src/ML_ADMQuantities.cc @@ -43,8 +43,6 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1ody = INV(dy); CCTK_REAL const p1odz = INV(dz); @@ -95,7 +93,7 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMQuantities, + CCTK_LOOP3(ML_ADMQuantities, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -653,7 +651,7 @@ static void ML_ADMQuantities_Body(cGH const * restrict const cctkGH, int const d Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - CCTK_ENDLOOP3 (ML_ADMQuantities); + CCTK_ENDLOOP3(ML_ADMQuantities); } extern "C" void ML_ADMQuantities(CCTK_ARGUMENTS) @@ -672,12 +670,23 @@ extern "C" void ML_ADMQuantities(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities::ML_Jadm","ML_ADMQuantities::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities::ML_Jadm", + "ML_ADMQuantities::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities/src/make.code.defn b/ML_ADMQuantities/src/make.code.defn index 65932ec..c0f1ce4 100644 --- a/ML_ADMQuantities/src/make.code.defn +++ b/ML_ADMQuantities/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities.cc Boundaries.cc diff --git a/ML_ADMQuantities_MP/schedule.ccl b/ML_ADMQuantities_MP/schedule.ccl index 83df5f2..6f7e000 100644 --- a/ML_ADMQuantities_MP/schedule.ccl +++ b/ML_ADMQuantities_MP/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMQuantities_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,20 @@ schedule group ML_ADMQuantities_MP_group in MoL_PseudoEvolution after MoL_PostSt schedule ML_ADMQuantities_MP in ML_ADMQuantities_MP_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities_MP::ML_Jadm + WRITES: ML_ADMQuantities_MP::ML_Madm } "ML_ADMQuantities_MP" schedule ML_ADMQuantities_MP_SelectBCs in ML_ADMQuantities_MP_bc_group @@ -90,6 +98,12 @@ schedule ML_ADMQuantities_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_MP_ApplyBCs in MoL_PostStep after ML_ADMQuantities_MP_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc b/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc index 16f311f..fd37c79 100644 --- a/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc +++ b/ML_ADMQuantities_MP/src/ML_ADMQuantities_MP.cc @@ -43,8 +43,6 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1ody = INV(dy); CCTK_REAL const p1odz = INV(dz); @@ -140,7 +138,7 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMQuantities_MP, + CCTK_LOOP3(ML_ADMQuantities_MP, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -1484,7 +1482,7 @@ static void ML_ADMQuantities_MP_Body(cGH const * restrict const cctkGH, int cons Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - CCTK_ENDLOOP3 (ML_ADMQuantities_MP); + CCTK_ENDLOOP3(ML_ADMQuantities_MP); } extern "C" void ML_ADMQuantities_MP(CCTK_ARGUMENTS) @@ -1503,12 +1501,23 @@ extern "C" void ML_ADMQuantities_MP(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities_MP::ML_Jadm","ML_ADMQuantities_MP::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities_MP::ML_Jadm", + "ML_ADMQuantities_MP::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities_MP", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities_MP", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_MP_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_MP_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities_MP/src/make.code.defn b/ML_ADMQuantities_MP/src/make.code.defn index 559bbd8..4ac9711 100644 --- a/ML_ADMQuantities_MP/src/make.code.defn +++ b/ML_ADMQuantities_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities_MP.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities_MP.cc Boundaries.cc diff --git a/ML_ADMQuantities_O2/schedule.ccl b/ML_ADMQuantities_O2/schedule.ccl index 336e8a1..0c73b79 100644 --- a/ML_ADMQuantities_O2/schedule.ccl +++ b/ML_ADMQuantities_O2/schedule.ccl @@ -33,12 +33,6 @@ schedule ML_ADMQuantities_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_ADMQuantities_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_ADMQuantities_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -53,6 +47,20 @@ schedule group ML_ADMQuantities_O2_group in MoL_PseudoEvolution after MoL_PostSt schedule ML_ADMQuantities_O2 in ML_ADMQuantities_O2_group { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_ADMQuantities_O2::ML_Jadm + WRITES: ML_ADMQuantities_O2::ML_Madm } "ML_ADMQuantities_O2" schedule ML_ADMQuantities_O2_SelectBCs in ML_ADMQuantities_O2_bc_group @@ -90,6 +98,12 @@ schedule ML_ADMQuantities_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_ADMQuantities_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_ADMQuantities_O2_ApplyBCs in MoL_PostStep after ML_ADMQuantities_O2_SelectBoundConds { # no language specified diff --git a/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc b/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc index 529c3d0..84f71dd 100644 --- a/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc +++ b/ML_ADMQuantities_O2/src/ML_ADMQuantities_O2.cc @@ -43,8 +43,6 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx = INV(dx); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody = INV(dy); @@ -98,7 +96,7 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (ML_ADMQuantities_O2, + CCTK_LOOP3(ML_ADMQuantities_O2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -656,7 +654,7 @@ static void ML_ADMQuantities_O2_Body(cGH const * restrict const cctkGH, int cons Jadm3[index] = Jadm3L; Madm[index] = MadmL; } - CCTK_ENDLOOP3 (ML_ADMQuantities_O2); + CCTK_ENDLOOP3(ML_ADMQuantities_O2); } extern "C" void ML_ADMQuantities_O2(CCTK_ARGUMENTS) @@ -675,12 +673,23 @@ extern "C" void ML_ADMQuantities_O2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv","ML_ADMQuantities_O2::ML_Jadm","ML_ADMQuantities_O2::ML_Madm"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv", + "ML_ADMQuantities_O2::ML_Jadm", + "ML_ADMQuantities_O2::ML_Madm"}; GenericFD_AssertGroupStorage(cctkGH, "ML_ADMQuantities_O2", 11, groups); GenericFD_EnsureStencilFits(cctkGH, "ML_ADMQuantities_O2", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &ML_ADMQuantities_O2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_ADMQuantities_O2_Body); if (verbose > 1) { diff --git a/ML_ADMQuantities_O2/src/make.code.defn b/ML_ADMQuantities_O2/src/make.code.defn index f8be0a5..f708d22 100644 --- a/ML_ADMQuantities_O2/src/make.code.defn +++ b/ML_ADMQuantities_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_ADMQuantities_O2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_ADMQuantities_O2.cc Boundaries.cc diff --git a/ML_BSSN/schedule.ccl b/ML_BSSN/schedule.ccl index 381faeb..3e657c9 100644 --- a/ML_BSSN/schedule.ccl +++ b/ML_BSSN/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_trace_curv + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_InitGamma AT initial BEFORE ML_BSSN_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_convertFromADMBaseGamma" } schedule ML_BSSN_RHS1 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHS1" schedule ML_BSSN_RHS2 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_curvrhs } "ML_BSSN_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_Dissipation IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Dissipation" } schedule ML_BSSN_Advect IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Advect" schedule ML_BSSN_InitRHS AT analysis BEFORE ML_BSSN_evolCalcGroup { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHSStaticBoundary" } schedule ML_BSSN_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse } "ML_BSSN_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_boundary" } schedule ML_BSSN_convertToADMBase IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_convertToADMBaseFakeDtLapseShift IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_constraints1_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints1 in ML_BSSN_constraints1_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_Ham } "ML_BSSN_constraints1" schedule ML_BSSN_constraints1_SelectBCs in ML_BSSN_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_constraints2_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints2 in ML_BSSN_constraints2_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_cons_detg + WRITES: ML_BSSN::ML_cons_Gamma + WRITES: ML_BSSN::ML_cons_traceA + WRITES: ML_BSSN::ML_mom } "ML_BSSN_constraints2" schedule ML_BSSN_constraints2_SelectBCs in ML_BSSN_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_ApplyBCs in MoL_PostStep after ML_BSSN_SelectBoundConds { # no language specified diff --git a/ML_BSSN/src/ML_BSSN_Advect.cc b/ML_BSSN/src/ML_BSSN_Advect.cc index 9dffd24..2543f21 100644 --- a/ML_BSSN/src/ML_BSSN_Advect.cc +++ b/ML_BSSN/src/ML_BSSN_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Advect, + LC_LOOP3VEC(ML_BSSN_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Advect); + LC_ENDLOOP3VEC(ML_BSSN_Advect); } extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Dissipation.cc b/ML_BSSN/src/ML_BSSN_Dissipation.cc index ce66fd0..98378a0 100644 --- a/ML_BSSN/src/ML_BSSN_Dissipation.cc +++ b/ML_BSSN/src/ML_BSSN_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Dissipation, + LC_LOOP3VEC(ML_BSSN_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_Dissipation); } extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitGamma.cc b/ML_BSSN/src/ML_BSSN_InitGamma.cc index 480696a..647de3b 100644 --- a/ML_BSSN/src/ML_BSSN_InitGamma.cc +++ b/ML_BSSN/src/ML_BSSN_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitGamma, + LC_LOOP3VEC(ML_BSSN_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_InitGamma); } extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitRHS.cc b/ML_BSSN/src/ML_BSSN_InitRHS.cc index 3c556f5..0f36ec8 100644 --- a/ML_BSSN/src/ML_BSSN_InitRHS.cc +++ b/ML_BSSN/src/ML_BSSN_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitRHS, + LC_LOOP3VEC(ML_BSSN_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_InitRHS); } extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Minkowski.cc b/ML_BSSN/src/ML_BSSN_Minkowski.cc index aaa94dc..f1033d0 100644 --- a/ML_BSSN/src/ML_BSSN_Minkowski.cc +++ b/ML_BSSN/src/ML_BSSN_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Minkowski, + LC_LOOP3VEC(ML_BSSN_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_Minkowski); } extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS1.cc b/ML_BSSN/src/ML_BSSN_RHS1.cc index 35f76e5..3775bb7 100644 --- a/ML_BSSN/src/ML_BSSN_RHS1.cc +++ b/ML_BSSN/src/ML_BSSN_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS1, + LC_LOOP3VEC(ML_BSSN_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_RHS1); } extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS2.cc b/ML_BSSN/src/ML_BSSN_RHS2.cc index b918080..fa76e9b 100644 --- a/ML_BSSN/src/ML_BSSN_RHS2.cc +++ b/ML_BSSN/src/ML_BSSN_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS2, + LC_LOOP3VEC(ML_BSSN_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_RHS2); } extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc index ed145c6..ba11ca7 100644 --- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_RHSStaticBoundary); } extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_boundary.cc b/ML_BSSN/src/ML_BSSN_boundary.cc index 950546b..140da41 100644 --- a/ML_BSSN/src/ML_BSSN_boundary.cc +++ b/ML_BSSN/src/ML_BSSN_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_boundary, + LC_LOOP3VEC(ML_BSSN_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_boundary); + LC_ENDLOOP3VEC(ML_BSSN_boundary); } extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints1.cc b/ML_BSSN/src/ML_BSSN_constraints1.cc index b44a951..8f89811 100644 --- a/ML_BSSN/src/ML_BSSN_constraints1.cc +++ b/ML_BSSN/src/ML_BSSN_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints1, + LC_LOOP3VEC(ML_BSSN_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_constraints1); } extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_Ham","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Ham", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints2.cc b/ML_BSSN/src/ML_BSSN_constraints2.cc index 09be780..ecc059b 100644 --- a/ML_BSSN/src/ML_BSSN_constraints2.cc +++ b/ML_BSSN/src/ML_BSSN_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints2, + LC_LOOP3VEC(ML_BSSN_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_constraints2); } extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_cons_detg","ML_BSSN::ML_cons_Gamma","ML_BSSN::ML_cons_traceA","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_mom","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_cons_detg", + "ML_BSSN::ML_cons_Gamma", + "ML_BSSN::ML_cons_traceA", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_mom", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc index 985d026..96db9f9 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBase); } extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc index b32e95b..37a355b 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBaseGamma); } extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_metric","ML_BSSN::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc index 6afda7b..de1d9df 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBase); } extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc index 9c0a861..8af669c 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc index 37ab562..67e687d 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc index f030741..5944c01 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_enforce.cc b/ML_BSSN/src/ML_BSSN_enforce.cc index ef5cd15..50ef72a 100644 --- a/ML_BSSN/src/ML_BSSN_enforce.cc +++ b/ML_BSSN/src/ML_BSSN_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_enforce, + LC_LOOP3VEC(ML_BSSN_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_enforce); + LC_ENDLOOP3VEC(ML_BSSN_enforce); } extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_metric"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/make.code.defn b/ML_BSSN/src/make.code.defn index 5fdc3cf..39c368f 100644 --- a/ML_BSSN/src/make.code.defn +++ b/ML_BSSN/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_MP/schedule.ccl b/ML_BSSN_MP/schedule.ccl index a320d73..76a405a 100644 --- a/ML_BSSN_MP/schedule.ccl +++ b/ML_BSSN_MP/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_MP_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_MP_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_MP_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_MP_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_InitGamma AT initial BEFORE ML_BSSN_MP_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma } "ML_BSSN_MP_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma } "ML_BSSN_MP_convertFromADMBaseGamma" } schedule ML_BSSN_MP_RHS1 IN ML_BSSN_MP_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_RHS1" schedule ML_BSSN_MP_RHS2 IN ML_BSSN_MP_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_curvrhs } "ML_BSSN_MP_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_MP_Dissipation IN ML_BSSN_MP_evolCalcGroup AFTER (ML_BSSN_MP_RHS1 ML_BSSN_MP_RHS2) { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_curvrhs + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtlapserhs + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_dtshiftrhs + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_Gammarhs + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_lapserhs + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_log_confacrhs + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_metricrhs + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_shiftrhs + READS: ML_BSSN_MP::ML_trace_curv + READS: ML_BSSN_MP::ML_trace_curvrhs + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_Dissipation" } schedule ML_BSSN_MP_Advect IN ML_BSSN_MP_evolCalcGroup AFTER (ML_BSSN_MP_RHS1 ML_BSSN_MP_RHS2) { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_curvrhs + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtlapserhs + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_dtshiftrhs + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_Gammarhs + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_lapserhs + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_log_confacrhs + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_metricrhs + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_shiftrhs + READS: ML_BSSN_MP::ML_trace_curv + READS: ML_BSSN_MP::ML_trace_curvrhs + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_Advect" schedule ML_BSSN_MP_InitRHS AT analysis BEFORE ML_BSSN_MP_evolCalcGroup { LANG: C + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_MP_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_MP::ML_curvrhs + WRITES: ML_BSSN_MP::ML_dtlapserhs + WRITES: ML_BSSN_MP::ML_dtshiftrhs + WRITES: ML_BSSN_MP::ML_Gammarhs + WRITES: ML_BSSN_MP::ML_lapserhs + WRITES: ML_BSSN_MP::ML_log_confacrhs + WRITES: ML_BSSN_MP::ML_metricrhs + WRITES: ML_BSSN_MP::ML_shiftrhs + WRITES: ML_BSSN_MP::ML_trace_curvrhs } "ML_BSSN_MP_RHSStaticBoundary" } schedule ML_BSSN_MP_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_lapse } "ML_BSSN_MP_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_MP_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_MP::ML_curv + WRITES: ML_BSSN_MP::ML_dtlapse + WRITES: ML_BSSN_MP::ML_dtshift + WRITES: ML_BSSN_MP::ML_Gamma + WRITES: ML_BSSN_MP::ML_lapse + WRITES: ML_BSSN_MP::ML_log_confac + WRITES: ML_BSSN_MP::ML_metric + WRITES: ML_BSSN_MP::ML_shift + WRITES: ML_BSSN_MP::ML_trace_curv } "ML_BSSN_MP_boundary" } schedule ML_BSSN_MP_convertToADMBase IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_MP_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_MP_convertToADMBaseFakeDtLapseShift IN ML_BSSN_MP_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP::ML_dtlapse + READS: ML_BSSN_MP::ML_dtshift + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_MP_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_MP_constraints1 in ML_BSSN_MP_constraints1_group { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_Ham } "ML_BSSN_MP_constraints1" schedule ML_BSSN_MP_constraints1_SelectBCs in ML_BSSN_MP_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_MP_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_MP_constraints2 in ML_BSSN_MP_constraints2_group { LANG: C + READS: ML_BSSN_MP::ML_curv + READS: ML_BSSN_MP::ML_Gamma + READS: ML_BSSN_MP::ML_lapse + READS: ML_BSSN_MP::ML_log_confac + READS: ML_BSSN_MP::ML_metric + READS: ML_BSSN_MP::ML_shift + READS: ML_BSSN_MP::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP::ML_cons_detg + WRITES: ML_BSSN_MP::ML_cons_Gamma + WRITES: ML_BSSN_MP::ML_cons_traceA + WRITES: ML_BSSN_MP::ML_mom } "ML_BSSN_MP_constraints2" schedule ML_BSSN_MP_constraints2_SelectBCs in ML_BSSN_MP_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_MP_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_MP_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_MP_ApplyBCs in MoL_PostStep after ML_BSSN_MP_SelectBoundConds { # no language specified diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc index 9b737ad..bb81808 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Advect, + LC_LOOP3VEC(ML_BSSN_MP_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_MP_Advect_Body(cGH const * restrict const cctkGH, int const B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Advect); + LC_ENDLOOP3VEC(ML_BSSN_MP_Advect); } extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_MP_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc index 148d9ac..9e85cf5 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Dissipation, + LC_LOOP3VEC(ML_BSSN_MP_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_MP_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_MP_Dissipation); } extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_MP_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc b/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc index 5d06ba9..f2ac982 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_InitGamma, + LC_LOOP3VEC(ML_BSSN_MP_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_MP_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_InitGamma); } extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_MP_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc b/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc index e899dc2..8f86b3a 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_InitRHS, + LC_LOOP3VEC(ML_BSSN_MP_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_MP_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_MP_InitRHS); } extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_MP_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc b/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc index 8b48c53..ad586eb 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_Minkowski, + LC_LOOP3VEC(ML_BSSN_MP_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_MP_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_MP_Minkowski); } extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_MP_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc index 35a9896..36e1728 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHS1, + LC_LOOP3VEC(ML_BSSN_MP_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_MP_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHS1); } extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curv","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curv", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_MP_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc index b316cf6..ab1f104 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHS2, + LC_LOOP3VEC(ML_BSSN_MP_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_MP_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHS2); } extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_MP_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc index eb5f20b..2d72cf2 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_MP_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_MP_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_RHSStaticBoundary); } extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curvrhs","ML_BSSN_MP::ML_dtlapserhs","ML_BSSN_MP::ML_dtshiftrhs","ML_BSSN_MP::ML_Gammarhs","ML_BSSN_MP::ML_lapserhs","ML_BSSN_MP::ML_log_confacrhs","ML_BSSN_MP::ML_metricrhs","ML_BSSN_MP::ML_shiftrhs","ML_BSSN_MP::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curvrhs", + "ML_BSSN_MP::ML_dtlapserhs", + "ML_BSSN_MP::ML_dtshiftrhs", + "ML_BSSN_MP::ML_Gammarhs", + "ML_BSSN_MP::ML_lapserhs", + "ML_BSSN_MP::ML_log_confacrhs", + "ML_BSSN_MP::ML_metricrhs", + "ML_BSSN_MP::ML_shiftrhs", + "ML_BSSN_MP::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_MP_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_MP_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_MP_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc index bc6f7c4..f7985cc 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_boundary, + LC_LOOP3VEC(ML_BSSN_MP_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_MP_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_boundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_boundary); } extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_MP_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc b/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc index bd38e64..a01cac9 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_constraints1, + LC_LOOP3VEC(ML_BSSN_MP_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_MP_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_MP_constraints1); } extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_Ham","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_Ham", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_MP_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc b/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc index dc92125..8288b08 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_constraints2, + LC_LOOP3VEC(ML_BSSN_MP_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_MP_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_MP_constraints2); } extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_cons_detg","ML_BSSN_MP::ML_cons_Gamma","ML_BSSN_MP::ML_cons_traceA","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_mom","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_cons_detg", + "ML_BSSN_MP::ML_cons_Gamma", + "ML_BSSN_MP::ML_cons_traceA", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_mom", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_MP_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc index 87cfa6c..c158da7 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_MP_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_MP_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertFromADMBase); } extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_MP_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc index cd2a72b..30273ee 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_MP_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_MP_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertFromADMBaseGamma); } extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_MP_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc index 7ccc25d..a229cd8 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_MP_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBase); } extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_MP_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc index 13a4680..74d185d 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_log_confac","ML_BSSN_MP::ML_metric","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_log_confac", + "ML_BSSN_MP::ML_metric", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc index d27dea7..5f6ec46 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc index defc539..ff7cd82 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP::ML_dtlapse","ML_BSSN_MP::ML_dtshift","ML_BSSN_MP::ML_Gamma","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_shift","ML_BSSN_MP::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP::ML_dtlapse", + "ML_BSSN_MP::ML_dtshift", + "ML_BSSN_MP::ML_Gamma", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_shift", + "ML_BSSN_MP::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_MP_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc b/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc index c8bee48..5c06425 100644 --- a/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc +++ b/ML_BSSN_MP/src/ML_BSSN_MP_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_enforce, + LC_LOOP3VEC(ML_BSSN_MP_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_MP_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_enforce); + LC_ENDLOOP3VEC(ML_BSSN_MP_enforce); } extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP::ML_curv","ML_BSSN_MP::ML_lapse","ML_BSSN_MP::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_MP::ML_curv", + "ML_BSSN_MP::ML_lapse", + "ML_BSSN_MP::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_MP_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP/src/make.code.defn b/ML_BSSN_MP/src/make.code.defn index a63207b..4cde6e2 100644 --- a/ML_BSSN_MP/src/make.code.defn +++ b/ML_BSSN_MP/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_MP_Minkowski.cc ML_BSSN_MP_convertFromADMBase.cc ML_BSSN_MP_InitGamma.cc ML_BSSN_MP_convertFromADMBaseGamma.cc ML_BSSN_MP_RHS1.cc ML_BSSN_MP_RHS2.cc ML_BSSN_MP_Dissipation.cc ML_BSSN_MP_Advect.cc ML_BSSN_MP_InitRHS.cc ML_BSSN_MP_RHSStaticBoundary.cc ML_BSSN_MP_enforce.cc ML_BSSN_MP_boundary.cc ML_BSSN_MP_convertToADMBase.cc ML_BSSN_MP_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_constraints1.cc ML_BSSN_MP_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_MP_Minkowski.cc ML_BSSN_MP_convertFromADMBase.cc ML_BSSN_MP_InitGamma.cc ML_BSSN_MP_convertFromADMBaseGamma.cc ML_BSSN_MP_RHS1.cc ML_BSSN_MP_RHS2.cc ML_BSSN_MP_Dissipation.cc ML_BSSN_MP_Advect.cc ML_BSSN_MP_InitRHS.cc ML_BSSN_MP_RHSStaticBoundary.cc ML_BSSN_MP_enforce.cc ML_BSSN_MP_boundary.cc ML_BSSN_MP_convertToADMBase.cc ML_BSSN_MP_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_constraints1.cc ML_BSSN_MP_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_MP_O8/schedule.ccl b/ML_BSSN_MP_O8/schedule.ccl index 6c8476a..61acdbe 100644 --- a/ML_BSSN_MP_O8/schedule.ccl +++ b/ML_BSSN_MP_O8/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_MP_O8_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_MP_O8_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_MP_O8_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_MP_O8_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_O8_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_MP_O8_InitGamma AT initial BEFORE ML_BSSN_MP_O8_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma } "ML_BSSN_MP_O8_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma } "ML_BSSN_MP_O8_convertFromADMBaseGamma" } schedule ML_BSSN_MP_O8_RHS1 IN ML_BSSN_MP_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_RHS1" schedule ML_BSSN_MP_O8_RHS2 IN ML_BSSN_MP_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_curvrhs } "ML_BSSN_MP_O8_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_MP_O8_Dissipation IN ML_BSSN_MP_O8_evolCalcGroup AFTER (ML_BSSN_MP_O8_RHS1 ML_BSSN_MP_O8_RHS2) { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_curvrhs + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtlapserhs + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_dtshiftrhs + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_Gammarhs + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_lapserhs + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_log_confacrhs + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_metricrhs + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_shiftrhs + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: ML_BSSN_MP_O8::ML_trace_curvrhs + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_Dissipation" } schedule ML_BSSN_MP_O8_Advect IN ML_BSSN_MP_O8_evolCalcGroup AFTER (ML_BSSN_MP_O8_RHS1 ML_BSSN_MP_O8_RHS2) { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_curvrhs + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtlapserhs + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_dtshiftrhs + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_Gammarhs + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_lapserhs + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_log_confacrhs + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_metricrhs + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_shiftrhs + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: ML_BSSN_MP_O8::ML_trace_curvrhs + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_Advect" schedule ML_BSSN_MP_O8_InitRHS AT analysis BEFORE ML_BSSN_MP_O8_evolCalcGroup { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_MP_O8_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curvrhs + WRITES: ML_BSSN_MP_O8::ML_dtlapserhs + WRITES: ML_BSSN_MP_O8::ML_dtshiftrhs + WRITES: ML_BSSN_MP_O8::ML_Gammarhs + WRITES: ML_BSSN_MP_O8::ML_lapserhs + WRITES: ML_BSSN_MP_O8::ML_log_confacrhs + WRITES: ML_BSSN_MP_O8::ML_metricrhs + WRITES: ML_BSSN_MP_O8::ML_shiftrhs + WRITES: ML_BSSN_MP_O8::ML_trace_curvrhs } "ML_BSSN_MP_O8_RHSStaticBoundary" } schedule ML_BSSN_MP_O8_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_lapse } "ML_BSSN_MP_O8_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_MP_O8_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_MP_O8::ML_curv + WRITES: ML_BSSN_MP_O8::ML_dtlapse + WRITES: ML_BSSN_MP_O8::ML_dtshift + WRITES: ML_BSSN_MP_O8::ML_Gamma + WRITES: ML_BSSN_MP_O8::ML_lapse + WRITES: ML_BSSN_MP_O8::ML_log_confac + WRITES: ML_BSSN_MP_O8::ML_metric + WRITES: ML_BSSN_MP_O8::ML_shift + WRITES: ML_BSSN_MP_O8::ML_trace_curv } "ML_BSSN_MP_O8_boundary" } schedule ML_BSSN_MP_O8_convertToADMBase IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_MP_O8_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift IN ML_BSSN_MP_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_MP_O8::ML_dtlapse + READS: ML_BSSN_MP_O8::ML_dtshift + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_MP_O8_constraints1_group in MoL_PseudoEvolution after MoL schedule ML_BSSN_MP_O8_constraints1 in ML_BSSN_MP_O8_constraints1_group { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_Ham } "ML_BSSN_MP_O8_constraints1" schedule ML_BSSN_MP_O8_constraints1_SelectBCs in ML_BSSN_MP_O8_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_MP_O8_constraints2_group in MoL_PseudoEvolution after MoL schedule ML_BSSN_MP_O8_constraints2 in ML_BSSN_MP_O8_constraints2_group { LANG: C + READS: ML_BSSN_MP_O8::ML_curv + READS: ML_BSSN_MP_O8::ML_Gamma + READS: ML_BSSN_MP_O8::ML_lapse + READS: ML_BSSN_MP_O8::ML_log_confac + READS: ML_BSSN_MP_O8::ML_metric + READS: ML_BSSN_MP_O8::ML_shift + READS: ML_BSSN_MP_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_MP_O8::ML_cons_detg + WRITES: ML_BSSN_MP_O8::ML_cons_Gamma + WRITES: ML_BSSN_MP_O8::ML_cons_traceA + WRITES: ML_BSSN_MP_O8::ML_mom } "ML_BSSN_MP_O8_constraints2" schedule ML_BSSN_MP_O8_constraints2_SelectBCs in ML_BSSN_MP_O8_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_MP_O8_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_MP_O8_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_MP_O8_ApplyBCs in MoL_PostStep after ML_BSSN_MP_O8_SelectBoundConds { # no language specified diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc index 21f4606..68b8762 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Advect, + LC_LOOP3VEC(ML_BSSN_MP_O8_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_MP_O8_Advect_Body(cGH const * restrict const cctkGH, int con B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Advect); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Advect); } extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_MP_O8_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc index 01a4295..e881266 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Dissipation, + LC_LOOP3VEC(ML_BSSN_MP_O8_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_MP_O8_Dissipation_Body(cGH const * restrict const cctkGH, in B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Dissipation); } extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_MP_O8_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc index 8ecb0ce..541b40c 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_InitGamma, + LC_LOOP3VEC(ML_BSSN_MP_O8_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_MP_O8_InitGamma_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_InitGamma); } extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_MP_O8_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc index c3e6789..078f49b 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_InitRHS, + LC_LOOP3VEC(ML_BSSN_MP_O8_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_MP_O8_InitRHS_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_InitRHS); } extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_MP_O8_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc index d78daed..5d55170 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_Minkowski, + LC_LOOP3VEC(ML_BSSN_MP_O8_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_MP_O8_Minkowski_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_Minkowski); } extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_MP_O8_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc index 4fce6b1..f235204 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHS1, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_MP_O8_RHS1_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHS1); } extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curv","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curv", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_MP_O8_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc index e8feed5..cdc4bad 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHS2, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_MP_O8_RHS2_Body(cGH const * restrict const cctkGH, int const kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHS2); } extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_MP_O8_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc index 25ff711..56504f3 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_MP_O8_RHSStaticBoundary_Body(cGH const * restrict const cctk CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_RHSStaticBoundary); } extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curvrhs","ML_BSSN_MP_O8::ML_dtlapserhs","ML_BSSN_MP_O8::ML_dtshiftrhs","ML_BSSN_MP_O8::ML_Gammarhs","ML_BSSN_MP_O8::ML_lapserhs","ML_BSSN_MP_O8::ML_log_confacrhs","ML_BSSN_MP_O8::ML_metricrhs","ML_BSSN_MP_O8::ML_shiftrhs","ML_BSSN_MP_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curvrhs", + "ML_BSSN_MP_O8::ML_dtlapserhs", + "ML_BSSN_MP_O8::ML_dtshiftrhs", + "ML_BSSN_MP_O8::ML_Gammarhs", + "ML_BSSN_MP_O8::ML_lapserhs", + "ML_BSSN_MP_O8::ML_log_confacrhs", + "ML_BSSN_MP_O8::ML_metricrhs", + "ML_BSSN_MP_O8::ML_shiftrhs", + "ML_BSSN_MP_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_MP_O8_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_MP_O8_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_MP_O8_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc index 9ff4193..f0f3b15 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_boundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_MP_O8_boundary_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_boundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_boundary); } extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_MP_O8_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_O8_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_O8_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc index 1a31056..6130f29 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_constraints1, + LC_LOOP3VEC(ML_BSSN_MP_O8_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_MP_O8_constraints1_Body(cGH const * restrict const cctkGH, i kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_constraints1); } extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_Ham","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_Ham", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_MP_O8_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc index 33b366c..216a96a 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_constraints2, + LC_LOOP3VEC(ML_BSSN_MP_O8_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_MP_O8_constraints2_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_constraints2); } extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_cons_detg","ML_BSSN_MP_O8::ML_cons_Gamma","ML_BSSN_MP_O8::ML_cons_traceA","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_mom","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_cons_detg", + "ML_BSSN_MP_O8::ML_cons_Gamma", + "ML_BSSN_MP_O8::ML_cons_traceA", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_mom", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_MP_O8_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc index b39c08b..2cf4397 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_MP_O8_convertFromADMBase_Body(cGH const * restrict const cct CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertFromADMBase); } extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc index f4aae76..a5a0820 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_MP_O8_convertFromADMBaseGamma_Body(cGH const * restrict cons B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertFromADMBaseGamma); } extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_MP_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc index 5ef8835..f5b2043 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_MP_O8_convertToADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBase); } extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc index 3393a0f..ceecbd6 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_log_confac","ML_BSSN_MP_O8::ML_metric","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_log_confac", + "ML_BSSN_MP_O8::ML_metric", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_MP_O8_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc index 2ba4094..2d6b9cf 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENT return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENT break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc index 968e324..e299e55 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * rest kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_MP_O8::ML_dtlapse","ML_BSSN_MP_O8::ML_dtshift","ML_BSSN_MP_O8::ML_Gamma","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_shift","ML_BSSN_MP_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_MP_O8::ML_dtlapse", + "ML_BSSN_MP_O8::ML_dtshift", + "ML_BSSN_MP_O8::ML_Gamma", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_shift", + "ML_BSSN_MP_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc index 7e46af4..03a2d5a 100644 --- a/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc +++ b/ML_BSSN_MP_O8/src/ML_BSSN_MP_O8_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_MP_O8_enforce, + LC_LOOP3VEC(ML_BSSN_MP_O8_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_MP_O8_enforce_Body(cGH const * restrict const cctkGH, int co alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_MP_O8_enforce); + LC_ENDLOOP3VEC(ML_BSSN_MP_O8_enforce); } extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_MP_O8::ML_curv","ML_BSSN_MP_O8::ML_lapse","ML_BSSN_MP_O8::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_MP_O8::ML_curv", + "ML_BSSN_MP_O8::ML_lapse", + "ML_BSSN_MP_O8::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_MP_O8_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_MP_O8_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_MP_O8_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_MP_O8_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_MP_O8/src/make.code.defn b/ML_BSSN_MP_O8/src/make.code.defn index 16b178f..7492ae5 100644 --- a/ML_BSSN_MP_O8/src/make.code.defn +++ b/ML_BSSN_MP_O8/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_MP_O8_Minkowski.cc ML_BSSN_MP_O8_convertFromADMBase.cc ML_BSSN_MP_O8_InitGamma.cc ML_BSSN_MP_O8_convertFromADMBaseGamma.cc ML_BSSN_MP_O8_RHS1.cc ML_BSSN_MP_O8_RHS2.cc ML_BSSN_MP_O8_Dissipation.cc ML_BSSN_MP_O8_Advect.cc ML_BSSN_MP_O8_InitRHS.cc ML_BSSN_MP_O8_RHSStaticBoundary.cc ML_BSSN_MP_O8_enforce.cc ML_BSSN_MP_O8_boundary.cc ML_BSSN_MP_O8_convertToADMBase.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_O8_constraints1.cc ML_BSSN_MP_O8_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_MP_O8_Minkowski.cc ML_BSSN_MP_O8_convertFromADMBase.cc ML_BSSN_MP_O8_InitGamma.cc ML_BSSN_MP_O8_convertFromADMBaseGamma.cc ML_BSSN_MP_O8_RHS1.cc ML_BSSN_MP_O8_RHS2.cc ML_BSSN_MP_O8_Dissipation.cc ML_BSSN_MP_O8_Advect.cc ML_BSSN_MP_O8_InitRHS.cc ML_BSSN_MP_O8_RHSStaticBoundary.cc ML_BSSN_MP_O8_enforce.cc ML_BSSN_MP_O8_boundary.cc ML_BSSN_MP_O8_convertToADMBase.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_MP_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_MP_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_MP_O8_constraints1.cc ML_BSSN_MP_O8_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_O2/schedule.ccl b/ML_BSSN_O2/schedule.ccl index 9b7231b..b607ca6 100644 --- a/ML_BSSN_O2/schedule.ccl +++ b/ML_BSSN_O2/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_O2_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_O2_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_O2_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_O2_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O2_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O2_InitGamma AT initial BEFORE ML_BSSN_O2_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma } "ML_BSSN_O2_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma } "ML_BSSN_O2_convertFromADMBaseGamma" } schedule ML_BSSN_O2_RHS1 IN ML_BSSN_O2_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_RHS1" schedule ML_BSSN_O2_RHS2 IN ML_BSSN_O2_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_curvrhs } "ML_BSSN_O2_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_O2_Dissipation IN ML_BSSN_O2_evolCalcGroup AFTER (ML_BSSN_O2_RHS1 ML_BSSN_O2_RHS2) { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_curvrhs + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtlapserhs + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_dtshiftrhs + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_Gammarhs + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_lapserhs + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_log_confacrhs + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_metricrhs + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_shiftrhs + READS: ML_BSSN_O2::ML_trace_curv + READS: ML_BSSN_O2::ML_trace_curvrhs + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_Dissipation" } schedule ML_BSSN_O2_Advect IN ML_BSSN_O2_evolCalcGroup AFTER (ML_BSSN_O2_RHS1 ML_BSSN_O2_RHS2) { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_curvrhs + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtlapserhs + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_dtshiftrhs + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_Gammarhs + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_lapserhs + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_log_confacrhs + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_metricrhs + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_shiftrhs + READS: ML_BSSN_O2::ML_trace_curv + READS: ML_BSSN_O2::ML_trace_curvrhs + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_Advect" schedule ML_BSSN_O2_InitRHS AT analysis BEFORE ML_BSSN_O2_evolCalcGroup { LANG: C + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_O2_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_O2::ML_curvrhs + WRITES: ML_BSSN_O2::ML_dtlapserhs + WRITES: ML_BSSN_O2::ML_dtshiftrhs + WRITES: ML_BSSN_O2::ML_Gammarhs + WRITES: ML_BSSN_O2::ML_lapserhs + WRITES: ML_BSSN_O2::ML_log_confacrhs + WRITES: ML_BSSN_O2::ML_metricrhs + WRITES: ML_BSSN_O2::ML_shiftrhs + WRITES: ML_BSSN_O2::ML_trace_curvrhs } "ML_BSSN_O2_RHSStaticBoundary" } schedule ML_BSSN_O2_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_lapse } "ML_BSSN_O2_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_O2_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_O2::ML_curv + WRITES: ML_BSSN_O2::ML_dtlapse + WRITES: ML_BSSN_O2::ML_dtshift + WRITES: ML_BSSN_O2::ML_Gamma + WRITES: ML_BSSN_O2::ML_lapse + WRITES: ML_BSSN_O2::ML_log_confac + WRITES: ML_BSSN_O2::ML_metric + WRITES: ML_BSSN_O2::ML_shift + WRITES: ML_BSSN_O2::ML_trace_curv } "ML_BSSN_O2_boundary" } schedule ML_BSSN_O2_convertToADMBase IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_O2_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_O2_convertToADMBaseFakeDtLapseShift IN ML_BSSN_O2_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O2::ML_dtlapse + READS: ML_BSSN_O2::ML_dtshift + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O2_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_O2_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O2_constraints1 in ML_BSSN_O2_constraints1_group { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_Ham } "ML_BSSN_O2_constraints1" schedule ML_BSSN_O2_constraints1_SelectBCs in ML_BSSN_O2_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_O2_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O2_constraints2 in ML_BSSN_O2_constraints2_group { LANG: C + READS: ML_BSSN_O2::ML_curv + READS: ML_BSSN_O2::ML_Gamma + READS: ML_BSSN_O2::ML_lapse + READS: ML_BSSN_O2::ML_log_confac + READS: ML_BSSN_O2::ML_metric + READS: ML_BSSN_O2::ML_shift + READS: ML_BSSN_O2::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O2::ML_cons_detg + WRITES: ML_BSSN_O2::ML_cons_Gamma + WRITES: ML_BSSN_O2::ML_cons_traceA + WRITES: ML_BSSN_O2::ML_mom } "ML_BSSN_O2_constraints2" schedule ML_BSSN_O2_constraints2_SelectBCs in ML_BSSN_O2_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_O2_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_O2_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_O2_ApplyBCs in MoL_PostStep after ML_BSSN_O2_SelectBoundConds { # no language specified diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc index 7948f81..e0c80a1 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Advect, + LC_LOOP3VEC(ML_BSSN_O2_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_O2_Advect_Body(cGH const * restrict const cctkGH, int const B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Advect); + LC_ENDLOOP3VEC(ML_BSSN_O2_Advect); } extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_O2_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc index 4d6ff63..255a4d1 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Dissipation, + LC_LOOP3VEC(ML_BSSN_O2_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_O2_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_O2_Dissipation); } extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_O2_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc b/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc index fb5d15e..6ac41e5 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_InitGamma, + LC_LOOP3VEC(ML_BSSN_O2_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_O2_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_O2_InitGamma); } extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_O2_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc b/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc index 2426f8a..9e86ac6 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_InitRHS, + LC_LOOP3VEC(ML_BSSN_O2_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_O2_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_O2_InitRHS); } extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_O2_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc b/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc index 2939849..3a9e014 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_Minkowski, + LC_LOOP3VEC(ML_BSSN_O2_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_O2_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_O2_Minkowski); } extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_O2_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc index 9ace51e..89602bb 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHS1, + LC_LOOP3VEC(ML_BSSN_O2_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_O2_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHS1); } extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curv","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curv", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_O2_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc index 25bd662..d92ff3d 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHS2, + LC_LOOP3VEC(ML_BSSN_O2_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_O2_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHS2); } extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_O2_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc index e88d9bb..49724d8 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_O2_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_O2_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_RHSStaticBoundary); } extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curvrhs","ML_BSSN_O2::ML_dtlapserhs","ML_BSSN_O2::ML_dtshiftrhs","ML_BSSN_O2::ML_Gammarhs","ML_BSSN_O2::ML_lapserhs","ML_BSSN_O2::ML_log_confacrhs","ML_BSSN_O2::ML_metricrhs","ML_BSSN_O2::ML_shiftrhs","ML_BSSN_O2::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curvrhs", + "ML_BSSN_O2::ML_dtlapserhs", + "ML_BSSN_O2::ML_dtshiftrhs", + "ML_BSSN_O2::ML_Gammarhs", + "ML_BSSN_O2::ML_lapserhs", + "ML_BSSN_O2::ML_log_confacrhs", + "ML_BSSN_O2::ML_metricrhs", + "ML_BSSN_O2::ML_shiftrhs", + "ML_BSSN_O2::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_O2_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_O2_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_O2_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc index b8d060f..a985413 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_boundary, + LC_LOOP3VEC(ML_BSSN_O2_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_O2_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_boundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_boundary); } extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_O2_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O2_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O2_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc b/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc index 916f883..20233ae 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_constraints1, + LC_LOOP3VEC(ML_BSSN_O2_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_O2_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_O2_constraints1); } extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_Ham","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_Ham", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_O2_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc b/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc index 5405fdf..e180c4c 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_constraints2, + LC_LOOP3VEC(ML_BSSN_O2_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_O2_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_O2_constraints2); } extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_cons_detg","ML_BSSN_O2::ML_cons_Gamma","ML_BSSN_O2::ML_cons_traceA","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_mom","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_cons_detg", + "ML_BSSN_O2::ML_cons_Gamma", + "ML_BSSN_O2::ML_cons_traceA", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_mom", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_O2_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc index f51ab09..a2b3d26 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_O2_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_O2_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertFromADMBase); } extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_O2_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc index f5f5c08..8a4134d 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_O2_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_O2_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertFromADMBaseGamma); } extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_O2_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc index c4dfa8c..7b47457 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_O2_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBase); } extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_O2_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc index 69db185..1882fa9 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_log_confac","ML_BSSN_O2::ML_metric","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_log_confac", + "ML_BSSN_O2::ML_metric", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O2_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O2_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc index d3c9f3e..ccbdae5 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc index 1f606a1..572ec21 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O2_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O2_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O2_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O2::ML_dtlapse","ML_BSSN_O2::ML_dtshift","ML_BSSN_O2::ML_Gamma","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_shift","ML_BSSN_O2::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O2::ML_dtlapse", + "ML_BSSN_O2::ML_dtshift", + "ML_BSSN_O2::ML_Gamma", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_shift", + "ML_BSSN_O2::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_O2_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc b/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc index 199e71a..7ad6d3b 100644 --- a/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc +++ b/ML_BSSN_O2/src/ML_BSSN_O2_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O2_enforce, + LC_LOOP3VEC(ML_BSSN_O2_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_O2_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_O2_enforce); + LC_ENDLOOP3VEC(ML_BSSN_O2_enforce); } extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O2::ML_curv","ML_BSSN_O2::ML_lapse","ML_BSSN_O2::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_O2::ML_curv", + "ML_BSSN_O2::ML_lapse", + "ML_BSSN_O2::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O2_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_O2_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O2_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O2_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_O2/src/make.code.defn b/ML_BSSN_O2/src/make.code.defn index a98a680..f71475a 100644 --- a/ML_BSSN_O2/src/make.code.defn +++ b/ML_BSSN_O2/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_O2_Minkowski.cc ML_BSSN_O2_convertFromADMBase.cc ML_BSSN_O2_InitGamma.cc ML_BSSN_O2_convertFromADMBaseGamma.cc ML_BSSN_O2_RHS1.cc ML_BSSN_O2_RHS2.cc ML_BSSN_O2_Dissipation.cc ML_BSSN_O2_Advect.cc ML_BSSN_O2_InitRHS.cc ML_BSSN_O2_RHSStaticBoundary.cc ML_BSSN_O2_enforce.cc ML_BSSN_O2_boundary.cc ML_BSSN_O2_convertToADMBase.cc ML_BSSN_O2_convertToADMBaseDtLapseShift.cc ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O2_constraints1.cc ML_BSSN_O2_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_O2_Minkowski.cc ML_BSSN_O2_convertFromADMBase.cc ML_BSSN_O2_InitGamma.cc ML_BSSN_O2_convertFromADMBaseGamma.cc ML_BSSN_O2_RHS1.cc ML_BSSN_O2_RHS2.cc ML_BSSN_O2_Dissipation.cc ML_BSSN_O2_Advect.cc ML_BSSN_O2_InitRHS.cc ML_BSSN_O2_RHSStaticBoundary.cc ML_BSSN_O2_enforce.cc ML_BSSN_O2_boundary.cc ML_BSSN_O2_convertToADMBase.cc ML_BSSN_O2_convertToADMBaseDtLapseShift.cc ML_BSSN_O2_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O2_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O2_constraints1.cc ML_BSSN_O2_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_O8/schedule.ccl b/ML_BSSN_O8/schedule.ccl index b41eecb..d734ed8 100644 --- a/ML_BSSN_O8/schedule.ccl +++ b/ML_BSSN_O8/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_O8_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_O8_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_O8_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_O8_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O8_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_O8_InitGamma AT initial BEFORE ML_BSSN_O8_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma } "ML_BSSN_O8_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma } "ML_BSSN_O8_convertFromADMBaseGamma" } schedule ML_BSSN_O8_RHS1 IN ML_BSSN_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_RHS1" schedule ML_BSSN_O8_RHS2 IN ML_BSSN_O8_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_curvrhs } "ML_BSSN_O8_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_O8_Dissipation IN ML_BSSN_O8_evolCalcGroup AFTER (ML_BSSN_O8_RHS1 ML_BSSN_O8_RHS2) { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_curvrhs + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtlapserhs + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_dtshiftrhs + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_Gammarhs + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_lapserhs + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_log_confacrhs + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_metricrhs + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_shiftrhs + READS: ML_BSSN_O8::ML_trace_curv + READS: ML_BSSN_O8::ML_trace_curvrhs + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_Dissipation" } schedule ML_BSSN_O8_Advect IN ML_BSSN_O8_evolCalcGroup AFTER (ML_BSSN_O8_RHS1 ML_BSSN_O8_RHS2) { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_curvrhs + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtlapserhs + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_dtshiftrhs + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_Gammarhs + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_lapserhs + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_log_confacrhs + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_metricrhs + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_shiftrhs + READS: ML_BSSN_O8::ML_trace_curv + READS: ML_BSSN_O8::ML_trace_curvrhs + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_Advect" schedule ML_BSSN_O8_InitRHS AT analysis BEFORE ML_BSSN_O8_evolCalcGroup { LANG: C + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_O8_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_O8::ML_curvrhs + WRITES: ML_BSSN_O8::ML_dtlapserhs + WRITES: ML_BSSN_O8::ML_dtshiftrhs + WRITES: ML_BSSN_O8::ML_Gammarhs + WRITES: ML_BSSN_O8::ML_lapserhs + WRITES: ML_BSSN_O8::ML_log_confacrhs + WRITES: ML_BSSN_O8::ML_metricrhs + WRITES: ML_BSSN_O8::ML_shiftrhs + WRITES: ML_BSSN_O8::ML_trace_curvrhs } "ML_BSSN_O8_RHSStaticBoundary" } schedule ML_BSSN_O8_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_lapse } "ML_BSSN_O8_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_O8_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_O8::ML_curv + WRITES: ML_BSSN_O8::ML_dtlapse + WRITES: ML_BSSN_O8::ML_dtshift + WRITES: ML_BSSN_O8::ML_Gamma + WRITES: ML_BSSN_O8::ML_lapse + WRITES: ML_BSSN_O8::ML_log_confac + WRITES: ML_BSSN_O8::ML_metric + WRITES: ML_BSSN_O8::ML_shift + WRITES: ML_BSSN_O8::ML_trace_curv } "ML_BSSN_O8_boundary" } schedule ML_BSSN_O8_convertToADMBase IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_O8_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_O8_convertToADMBaseFakeDtLapseShift IN ML_BSSN_O8_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_O8::ML_dtlapse + READS: ML_BSSN_O8::ML_dtshift + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_O8_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_O8_constraints1_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O8_constraints1 in ML_BSSN_O8_constraints1_group { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_Ham } "ML_BSSN_O8_constraints1" schedule ML_BSSN_O8_constraints1_SelectBCs in ML_BSSN_O8_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_O8_constraints2_group in MoL_PseudoEvolution after MoL_Po schedule ML_BSSN_O8_constraints2 in ML_BSSN_O8_constraints2_group { LANG: C + READS: ML_BSSN_O8::ML_curv + READS: ML_BSSN_O8::ML_Gamma + READS: ML_BSSN_O8::ML_lapse + READS: ML_BSSN_O8::ML_log_confac + READS: ML_BSSN_O8::ML_metric + READS: ML_BSSN_O8::ML_shift + READS: ML_BSSN_O8::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_O8::ML_cons_detg + WRITES: ML_BSSN_O8::ML_cons_Gamma + WRITES: ML_BSSN_O8::ML_cons_traceA + WRITES: ML_BSSN_O8::ML_mom } "ML_BSSN_O8_constraints2" schedule ML_BSSN_O8_constraints2_SelectBCs in ML_BSSN_O8_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_O8_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_O8_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_O8_ApplyBCs in MoL_PostStep after ML_BSSN_O8_SelectBoundConds { # no language specified diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc index 7520755..0ecbeec 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Advect, + LC_LOOP3VEC(ML_BSSN_O8_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_O8_Advect_Body(cGH const * restrict const cctkGH, int const B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Advect); + LC_ENDLOOP3VEC(ML_BSSN_O8_Advect); } extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_O8_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc index 8d8ec24..7965398 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Dissipation, + LC_LOOP3VEC(ML_BSSN_O8_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_O8_Dissipation_Body(cGH const * restrict const cctkGH, int c B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_O8_Dissipation); } extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_O8_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc index 373bf4a..4bfcc04 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_InitGamma, + LC_LOOP3VEC(ML_BSSN_O8_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_O8_InitGamma); } extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc index 52e9e15..7b85d13 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_InitRHS, + LC_LOOP3VEC(ML_BSSN_O8_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_O8_InitRHS_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_O8_InitRHS); } extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_O8_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc b/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc index 4281ef3..e96858a 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_Minkowski, + LC_LOOP3VEC(ML_BSSN_O8_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_O8_Minkowski_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_O8_Minkowski); } extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_O8_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc index ec383b2..968fd06 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHS1, + LC_LOOP3VEC(ML_BSSN_O8_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_O8_RHS1_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHS1); } extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curv","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curv", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_O8_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc index 7da65bc..b57d3e9 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHS2, + LC_LOOP3VEC(ML_BSSN_O8_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_O8_RHS2_Body(cGH const * restrict const cctkGH, int const di kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHS2); } extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_O8_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc index 2a01a6d..387b7c4 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_O8_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_O8_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_RHSStaticBoundary); } extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curvrhs","ML_BSSN_O8::ML_dtlapserhs","ML_BSSN_O8::ML_dtshiftrhs","ML_BSSN_O8::ML_Gammarhs","ML_BSSN_O8::ML_lapserhs","ML_BSSN_O8::ML_log_confacrhs","ML_BSSN_O8::ML_metricrhs","ML_BSSN_O8::ML_shiftrhs","ML_BSSN_O8::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curvrhs", + "ML_BSSN_O8::ML_dtlapserhs", + "ML_BSSN_O8::ML_dtshiftrhs", + "ML_BSSN_O8::ML_Gammarhs", + "ML_BSSN_O8::ML_lapserhs", + "ML_BSSN_O8::ML_log_confacrhs", + "ML_BSSN_O8::ML_metricrhs", + "ML_BSSN_O8::ML_shiftrhs", + "ML_BSSN_O8::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_O8_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_O8_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_O8_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc index acdbe38..ea2dfd5 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_boundary, + LC_LOOP3VEC(ML_BSSN_O8_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_O8_boundary_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_boundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_boundary); } extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_O8_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O8_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O8_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc b/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc index 5b5924f..a4d0133 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_constraints1, + LC_LOOP3VEC(ML_BSSN_O8_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_O8_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_O8_constraints1); } extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_Ham","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_Ham", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_O8_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc b/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc index bc28b91..3e84842 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_constraints2, + LC_LOOP3VEC(ML_BSSN_O8_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_O8_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_O8_constraints2); } extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_cons_detg","ML_BSSN_O8::ML_cons_Gamma","ML_BSSN_O8::ML_cons_traceA","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_mom","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_cons_detg", + "ML_BSSN_O8::ML_cons_Gamma", + "ML_BSSN_O8::ML_cons_traceA", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_mom", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_O8_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc index 9d1ea69..5203f5b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_O8_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_O8_convertFromADMBase_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertFromADMBase); } extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_O8_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc index 5cdac0a..8386f9b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_O8_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_O8_convertFromADMBaseGamma_Body(cGH const * restrict const c B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertFromADMBaseGamma); } extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_O8_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc index 268fc11..98afb4e 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_O8_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBase); } extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_O8_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc index 406b9d9..3d8e868 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShift_Body(cGH const * restrict co CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_log_confac","ML_BSSN_O8::ML_metric","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_log_confac", + "ML_BSSN_O8::ML_metric", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_O8_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_O8_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc index e4b8e35..54fba2b 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * res kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc index fd3aec9..c6f5687 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_O8_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body(cGH const * restric kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_O8_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_O8_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_O8::ML_dtlapse","ML_BSSN_O8::ML_dtshift","ML_BSSN_O8::ML_Gamma","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_shift","ML_BSSN_O8::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_O8::ML_dtlapse", + "ML_BSSN_O8::ML_dtshift", + "ML_BSSN_O8::ML_Gamma", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_shift", + "ML_BSSN_O8::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_O8_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc b/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc index 8e71835..df3de65 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_O8_enforce, + LC_LOOP3VEC(ML_BSSN_O8_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_O8_enforce_Body(cGH const * restrict const cctkGH, int const alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_O8_enforce); + LC_ENDLOOP3VEC(ML_BSSN_O8_enforce); } extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_O8::ML_curv","ML_BSSN_O8::ML_lapse","ML_BSSN_O8::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_O8::ML_curv", + "ML_BSSN_O8::ML_lapse", + "ML_BSSN_O8::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_O8_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_O8_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_O8_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_O8_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_O8/src/make.code.defn b/ML_BSSN_O8/src/make.code.defn index bd625ef..0cf766f 100644 --- a/ML_BSSN_O8/src/make.code.defn +++ b/ML_BSSN_O8/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_O8_Minkowski.cc ML_BSSN_O8_convertFromADMBase.cc ML_BSSN_O8_InitGamma.cc ML_BSSN_O8_convertFromADMBaseGamma.cc ML_BSSN_O8_RHS1.cc ML_BSSN_O8_RHS2.cc ML_BSSN_O8_Dissipation.cc ML_BSSN_O8_Advect.cc ML_BSSN_O8_InitRHS.cc ML_BSSN_O8_RHSStaticBoundary.cc ML_BSSN_O8_enforce.cc ML_BSSN_O8_boundary.cc ML_BSSN_O8_convertToADMBase.cc ML_BSSN_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O8_constraints1.cc ML_BSSN_O8_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_O8_Minkowski.cc ML_BSSN_O8_convertFromADMBase.cc ML_BSSN_O8_InitGamma.cc ML_BSSN_O8_convertFromADMBaseGamma.cc ML_BSSN_O8_RHS1.cc ML_BSSN_O8_RHS2.cc ML_BSSN_O8_Dissipation.cc ML_BSSN_O8_Advect.cc ML_BSSN_O8_InitRHS.cc ML_BSSN_O8_RHSStaticBoundary.cc ML_BSSN_O8_enforce.cc ML_BSSN_O8_boundary.cc ML_BSSN_O8_convertToADMBase.cc ML_BSSN_O8_convertToADMBaseDtLapseShift.cc ML_BSSN_O8_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_O8_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_O8_constraints1.cc ML_BSSN_O8_constraints2.cc Boundaries.cc diff --git a/ML_BSSN_UPW/schedule.ccl b/ML_BSSN_UPW/schedule.ccl index 3f7dfd9..6b69c93 100644 --- a/ML_BSSN_UPW/schedule.ccl +++ b/ML_BSSN_UPW/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_UPW_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_UPW_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_UPW_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_UPW_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_UPW_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_UPW_InitGamma AT initial BEFORE ML_BSSN_UPW_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma } "ML_BSSN_UPW_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma } "ML_BSSN_UPW_convertFromADMBaseGamma" } schedule ML_BSSN_UPW_RHS1 IN ML_BSSN_UPW_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_RHS1" schedule ML_BSSN_UPW_RHS2 IN ML_BSSN_UPW_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_curvrhs } "ML_BSSN_UPW_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_UPW_Dissipation IN ML_BSSN_UPW_evolCalcGroup AFTER (ML_BSSN_UPW_RHS1 ML_BSSN_UPW_RHS2) { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_curvrhs + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtlapserhs + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_dtshiftrhs + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_Gammarhs + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_lapserhs + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_log_confacrhs + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_metricrhs + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_shiftrhs + READS: ML_BSSN_UPW::ML_trace_curv + READS: ML_BSSN_UPW::ML_trace_curvrhs + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_Dissipation" } schedule ML_BSSN_UPW_Advect IN ML_BSSN_UPW_evolCalcGroup AFTER (ML_BSSN_UPW_RHS1 ML_BSSN_UPW_RHS2) { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_curvrhs + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtlapserhs + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_dtshiftrhs + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_Gammarhs + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_lapserhs + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_log_confacrhs + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_metricrhs + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_shiftrhs + READS: ML_BSSN_UPW::ML_trace_curv + READS: ML_BSSN_UPW::ML_trace_curvrhs + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_Advect" schedule ML_BSSN_UPW_InitRHS AT analysis BEFORE ML_BSSN_UPW_evolCalcGroup { LANG: C + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_UPW_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN_UPW::ML_curvrhs + WRITES: ML_BSSN_UPW::ML_dtlapserhs + WRITES: ML_BSSN_UPW::ML_dtshiftrhs + WRITES: ML_BSSN_UPW::ML_Gammarhs + WRITES: ML_BSSN_UPW::ML_lapserhs + WRITES: ML_BSSN_UPW::ML_log_confacrhs + WRITES: ML_BSSN_UPW::ML_metricrhs + WRITES: ML_BSSN_UPW::ML_shiftrhs + WRITES: ML_BSSN_UPW::ML_trace_curvrhs } "ML_BSSN_UPW_RHSStaticBoundary" } schedule ML_BSSN_UPW_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_lapse } "ML_BSSN_UPW_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_UPW_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN_UPW::ML_curv + WRITES: ML_BSSN_UPW::ML_dtlapse + WRITES: ML_BSSN_UPW::ML_dtshift + WRITES: ML_BSSN_UPW::ML_Gamma + WRITES: ML_BSSN_UPW::ML_lapse + WRITES: ML_BSSN_UPW::ML_log_confac + WRITES: ML_BSSN_UPW::ML_metric + WRITES: ML_BSSN_UPW::ML_shift + WRITES: ML_BSSN_UPW::ML_trace_curv } "ML_BSSN_UPW_boundary" } schedule ML_BSSN_UPW_convertToADMBase IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_UPW_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift IN ML_BSSN_UPW_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN_UPW::ML_dtlapse + READS: ML_BSSN_UPW::ML_dtshift + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_UPW_constraints1_group in MoL_PseudoEvolution after MoL_P schedule ML_BSSN_UPW_constraints1 in ML_BSSN_UPW_constraints1_group { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_Ham } "ML_BSSN_UPW_constraints1" schedule ML_BSSN_UPW_constraints1_SelectBCs in ML_BSSN_UPW_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_UPW_constraints2_group in MoL_PseudoEvolution after MoL_P schedule ML_BSSN_UPW_constraints2 in ML_BSSN_UPW_constraints2_group { LANG: C + READS: ML_BSSN_UPW::ML_curv + READS: ML_BSSN_UPW::ML_Gamma + READS: ML_BSSN_UPW::ML_lapse + READS: ML_BSSN_UPW::ML_log_confac + READS: ML_BSSN_UPW::ML_metric + READS: ML_BSSN_UPW::ML_shift + READS: ML_BSSN_UPW::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN_UPW::ML_cons_detg + WRITES: ML_BSSN_UPW::ML_cons_Gamma + WRITES: ML_BSSN_UPW::ML_cons_traceA + WRITES: ML_BSSN_UPW::ML_mom } "ML_BSSN_UPW_constraints2" schedule ML_BSSN_UPW_constraints2_SelectBCs in ML_BSSN_UPW_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_UPW_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_UPW_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_UPW_ApplyBCs in MoL_PostStep after ML_BSSN_UPW_SelectBoundConds { # no language specified diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc index d9304d7..1fc6d96 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Advect, + LC_LOOP3VEC(ML_BSSN_UPW_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_UPW_Advect_Body(cGH const * restrict const cctkGH, int const B3rhsL = kmadd(kmadd(beta1L,kmadd(ksub(JacPDupwindNth1B3,JacPDupwindNth1Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth1Xt3),kmadd(beta2L,kmadd(ksub(JacPDupwindNth2B3,JacPDupwindNth2Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth2Xt3),kmul(beta3L,kmadd(ksub(JacPDupwindNth3B3,JacPDupwindNth3Xt3),ToReal(ShiftAdvectionCoeff),JacPDupwindNth3Xt3)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Advect); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Advect); } extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Advect", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_UPW_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc index 0144265..f0b0b60 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Dissipation, + LC_LOOP3VEC(ML_BSSN_UPW_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_UPW_Dissipation_Body(cGH const * restrict const cctkGH, int B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Dissipation); } extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_UPW_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc index 8130a3f..eb95c47 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_InitGamma, + LC_LOOP3VEC(ML_BSSN_UPW_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_UPW_InitGamma_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_UPW_InitGamma); } extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_UPW_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc index 2318559..dc02a4a 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_InitRHS, + LC_LOOP3VEC(ML_BSSN_UPW_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_UPW_InitRHS_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_UPW_InitRHS); } extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_UPW_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc index 80840ba..a77438e 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_Minkowski, + LC_LOOP3VEC(ML_BSSN_UPW_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_UPW_Minkowski_Body(cGH const * restrict const cctkGH, int co CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_UPW_Minkowski); } extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_UPW_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc index 3b7b6a7..be7f83d 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHS1, + LC_LOOP3VEC(ML_BSSN_UPW_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_UPW_RHS1_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHS1); } extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curv","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curv", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_UPW_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc index 2ca500f..4178637 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHS2, + LC_LOOP3VEC(ML_BSSN_UPW_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_UPW_RHS2_Body(cGH const * restrict const cctkGH, int const d kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHS2); } extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_UPW_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc index 25eff8a..1e24ff9 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_UPW_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_UPW_RHSStaticBoundary_Body(cGH const * restrict const cctkGH CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_RHSStaticBoundary); } extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curvrhs","ML_BSSN_UPW::ML_dtlapserhs","ML_BSSN_UPW::ML_dtshiftrhs","ML_BSSN_UPW::ML_Gammarhs","ML_BSSN_UPW::ML_lapserhs","ML_BSSN_UPW::ML_log_confacrhs","ML_BSSN_UPW::ML_metricrhs","ML_BSSN_UPW::ML_shiftrhs","ML_BSSN_UPW::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curvrhs", + "ML_BSSN_UPW::ML_dtlapserhs", + "ML_BSSN_UPW::ML_dtshiftrhs", + "ML_BSSN_UPW::ML_Gammarhs", + "ML_BSSN_UPW::ML_lapserhs", + "ML_BSSN_UPW::ML_log_confacrhs", + "ML_BSSN_UPW::ML_metricrhs", + "ML_BSSN_UPW::ML_shiftrhs", + "ML_BSSN_UPW::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_UPW_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_UPW_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_UPW_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc index 4ec5c67..4d6f0de 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_boundary, + LC_LOOP3VEC(ML_BSSN_UPW_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_UPW_boundary_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_boundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_boundary); } extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_UPW_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_UPW_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_UPW_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc index 7c0aab7..929992f 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_constraints1, + LC_LOOP3VEC(ML_BSSN_UPW_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_UPW_constraints1_Body(cGH const * restrict const cctkGH, int kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_UPW_constraints1); } extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_Ham","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_Ham", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_UPW_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc index a198a37..e15db36 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_constraints2, + LC_LOOP3VEC(ML_BSSN_UPW_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_UPW_constraints2_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_UPW_constraints2); } extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_cons_detg","ML_BSSN_UPW::ML_cons_Gamma","ML_BSSN_UPW::ML_cons_traceA","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_mom","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_cons_detg", + "ML_BSSN_UPW::ML_cons_Gamma", + "ML_BSSN_UPW::ML_cons_traceA", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_mom", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_UPW_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc index 8e0a726..acfc692 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_UPW_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_UPW_convertFromADMBase_Body(cGH const * restrict const cctkG CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertFromADMBase); } extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_UPW_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc index 16302eb..332de3c 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_UPW_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -709,13 +707,13 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta1,kmadd(beta2L,JacPDupwindNth2beta1,kmul(beta3L,JacPDupwindNth3beta1))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta2,kmadd(beta2L,JacPDupwindNth2beta2,kmul(beta3L,JacPDupwindNth3beta2))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -726,60 +724,17 @@ static void ML_BSSN_UPW_convertFromADMBaseGamma_Body(cGH const * restrict const B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertFromADMBaseGamma); } extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -798,7 +753,17 @@ extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -820,7 +785,7 @@ extern "C" void ML_BSSN_UPW_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc index dfc421d..af49a26 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_UPW_convertToADMBase_Body(cGH const * restrict const cctkGH, CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBase); } extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc index ca386eb..4671a53 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -719,48 +717,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNth1beta3,kmadd(beta2L,JacPDupwindNth2beta3,kmul(beta3L,JacPDupwindNth3beta3))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -779,7 +743,19 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_log_confac","ML_BSSN_UPW::ML_metric","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_log_confac", + "ML_BSSN_UPW::ML_metric", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -801,7 +777,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_UPW_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc index 995d721..723f9b0 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * re kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc index 824bf86..d33767f 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body(cGH const * restri kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN_UPW::ML_dtlapse","ML_BSSN_UPW::ML_dtshift","ML_BSSN_UPW::ML_Gamma","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_shift","ML_BSSN_UPW::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN_UPW::ML_dtlapse", + "ML_BSSN_UPW::ML_dtshift", + "ML_BSSN_UPW::ML_Gamma", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_shift", + "ML_BSSN_UPW::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc b/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc index 479dd7f..2f87e25 100644 --- a/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc +++ b/ML_BSSN_UPW/src/ML_BSSN_UPW_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_UPW_enforce, + LC_LOOP3VEC(ML_BSSN_UPW_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_UPW_enforce_Body(cGH const * restrict const cctkGH, int cons alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_UPW_enforce); + LC_ENDLOOP3VEC(ML_BSSN_UPW_enforce); } extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN_UPW::ML_curv","ML_BSSN_UPW::ML_lapse","ML_BSSN_UPW::ML_metric"}; + const char *const groups[] = { + "ML_BSSN_UPW::ML_curv", + "ML_BSSN_UPW::ML_lapse", + "ML_BSSN_UPW::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_UPW_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_UPW_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_UPW_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_UPW_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN_UPW/src/make.code.defn b/ML_BSSN_UPW/src/make.code.defn index ba8a337..f858947 100644 --- a/ML_BSSN_UPW/src/make.code.defn +++ b/ML_BSSN_UPW/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_UPW_Minkowski.cc ML_BSSN_UPW_convertFromADMBase.cc ML_BSSN_UPW_InitGamma.cc ML_BSSN_UPW_convertFromADMBaseGamma.cc ML_BSSN_UPW_RHS1.cc ML_BSSN_UPW_RHS2.cc ML_BSSN_UPW_Dissipation.cc ML_BSSN_UPW_Advect.cc ML_BSSN_UPW_InitRHS.cc ML_BSSN_UPW_RHSStaticBoundary.cc ML_BSSN_UPW_enforce.cc ML_BSSN_UPW_boundary.cc ML_BSSN_UPW_convertToADMBase.cc ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_UPW_constraints1.cc ML_BSSN_UPW_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_UPW_Minkowski.cc ML_BSSN_UPW_convertFromADMBase.cc ML_BSSN_UPW_InitGamma.cc ML_BSSN_UPW_convertFromADMBaseGamma.cc ML_BSSN_UPW_RHS1.cc ML_BSSN_UPW_RHS2.cc ML_BSSN_UPW_Dissipation.cc ML_BSSN_UPW_Advect.cc ML_BSSN_UPW_InitRHS.cc ML_BSSN_UPW_RHSStaticBoundary.cc ML_BSSN_UPW_enforce.cc ML_BSSN_UPW_boundary.cc ML_BSSN_UPW_convertToADMBase.cc ML_BSSN_UPW_convertToADMBaseDtLapseShift.cc ML_BSSN_UPW_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_UPW_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_UPW_constraints1.cc ML_BSSN_UPW_constraints2.cc Boundaries.cc diff --git a/ML_WaveToy/schedule.ccl b/ML_WaveToy/schedule.ccl index 9517af0..20b055e 100644 --- a/ML_WaveToy/schedule.ccl +++ b/ML_WaveToy/schedule.ccl @@ -43,12 +43,6 @@ schedule ML_WaveToy_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_WaveToy_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_WaveToy_RegisterSymmetries in SymmetryRegister { LANG: C @@ -58,11 +52,17 @@ schedule ML_WaveToy_RegisterSymmetries in SymmetryRegister schedule WT_Gaussian AT initial { LANG: C + WRITES: ML_WaveToy::WT_rho + WRITES: ML_WaveToy::WT_u } "WT_Gaussian" schedule WT_RHS IN MoL_CalcRHS { LANG: C + READS: ML_WaveToy::WT_rho + READS: ML_WaveToy::WT_u + WRITES: ML_WaveToy::WT_rhorhs + WRITES: ML_WaveToy::WT_urhs } "WT_RHS" schedule WT_RHS AT analysis @@ -70,6 +70,10 @@ schedule WT_RHS AT analysis LANG: C SYNC: WT_rhorhs SYNC: WT_urhs + READS: ML_WaveToy::WT_rho + READS: ML_WaveToy::WT_u + WRITES: ML_WaveToy::WT_rhorhs + WRITES: ML_WaveToy::WT_urhs } "WT_RHS" schedule ML_WaveToy_SelectBoundConds in MoL_PostStep @@ -86,6 +90,12 @@ schedule ML_WaveToy_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_WaveToy_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_WaveToy_ApplyBCs in MoL_PostStep after ML_WaveToy_SelectBoundConds { # no language specified diff --git a/ML_WaveToy/src/WT_Gaussian.cc b/ML_WaveToy/src/WT_Gaussian.cc index bd7116c..c88259e 100644 --- a/ML_WaveToy/src/WT_Gaussian.cc +++ b/ML_WaveToy/src/WT_Gaussian.cc @@ -28,8 +28,6 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -60,9 +58,9 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -77,7 +75,7 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (WT_Gaussian, + CCTK_LOOP3(WT_Gaussian, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -100,7 +98,7 @@ static void WT_Gaussian_Body(cGH const * restrict const cctkGH, int const dir, i rho[index] = rhoL; u[index] = uL; } - CCTK_ENDLOOP3 (WT_Gaussian); + CCTK_ENDLOOP3(WT_Gaussian); } extern "C" void WT_Gaussian(CCTK_ARGUMENTS) @@ -119,11 +117,13 @@ extern "C" void WT_Gaussian(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_WaveToy::WT_rho","ML_WaveToy::WT_u"}; + const char *const groups[] = { + "ML_WaveToy::WT_rho", + "ML_WaveToy::WT_u"}; GenericFD_AssertGroupStorage(cctkGH, "WT_Gaussian", 2, groups); - GenericFD_LoopOverEverything(cctkGH, &WT_Gaussian_Body); + GenericFD_LoopOverEverything(cctkGH, WT_Gaussian_Body); if (verbose > 1) { diff --git a/ML_WaveToy/src/WT_RHS.cc b/ML_WaveToy/src/WT_RHS.cc index 6fe1d8e..2dd39f9 100644 --- a/ML_WaveToy/src/WT_RHS.cc +++ b/ML_WaveToy/src/WT_RHS.cc @@ -43,8 +43,6 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -75,9 +73,9 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); + CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx*dy); + CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx*dz); + CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy*dz); CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); @@ -92,7 +90,7 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (WT_RHS, + CCTK_LOOP3(WT_RHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -121,7 +119,7 @@ static void WT_RHS_Body(cGH const * restrict const cctkGH, int const dir, int co rhorhs[index] = rhorhsL; urhs[index] = urhsL; } - CCTK_ENDLOOP3 (WT_RHS); + CCTK_ENDLOOP3(WT_RHS); } extern "C" void WT_RHS(CCTK_ARGUMENTS) @@ -140,12 +138,16 @@ extern "C" void WT_RHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_WaveToy::WT_rho","ML_WaveToy::WT_rhorhs","ML_WaveToy::WT_u","ML_WaveToy::WT_urhs"}; + const char *const groups[] = { + "ML_WaveToy::WT_rho", + "ML_WaveToy::WT_rhorhs", + "ML_WaveToy::WT_u", + "ML_WaveToy::WT_urhs"}; GenericFD_AssertGroupStorage(cctkGH, "WT_RHS", 4, groups); GenericFD_EnsureStencilFits(cctkGH, "WT_RHS", 2, 2, 2); - GenericFD_LoopOverInterior(cctkGH, &WT_RHS_Body); + GenericFD_LoopOverInterior(cctkGH, WT_RHS_Body); if (verbose > 1) { diff --git a/ML_WaveToy/src/make.code.defn b/ML_WaveToy/src/make.code.defn index fbd9e15..a36e2d9 100644 --- a/ML_WaveToy/src/make.code.defn +++ b/ML_WaveToy/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc WT_Gaussian.cc WT_RHS.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc WT_Gaussian.cc WT_RHS.cc Boundaries.cc diff --git a/ML_hydro/schedule.ccl b/ML_hydro/schedule.ccl index 0d40717..055b0ff 100644 --- a/ML_hydro/schedule.ccl +++ b/ML_hydro/schedule.ccl @@ -125,12 +125,6 @@ schedule ML_hydro_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_hydro_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_hydro_RegisterSymmetries in SymmetryRegister { LANG: C @@ -143,6 +137,9 @@ if (CCTK_EQUALS(initial_data, "vacuum")) schedule hydro_vacuum IN ADMBase_InitialData { LANG: C + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_vacuum" } @@ -152,17 +149,41 @@ if (CCTK_EQUALS(initial_data, "sound wave")) schedule hydro_soundWave IN ADMBase_InitialData { LANG: C + READS: grid::coordinates + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_soundWave" } schedule hydro_prim2con AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ML_hydro::eps_group + READS: ML_hydro::mass_group + READS: ML_hydro::rho_group + READS: ML_hydro::vel_group + READS: ML_hydro::vol_group + WRITES: ML_hydro::ene_group + WRITES: ML_hydro::mass_group + WRITES: ML_hydro::mom_group + WRITES: ML_hydro::vol_group } "hydro_prim2con" schedule hydro_con2prim IN hydro_con2primGroup { LANG: C + READS: ML_hydro::ene_group + READS: ML_hydro::eps_group + READS: ML_hydro::mass_group + READS: ML_hydro::mom_group + READS: ML_hydro::rho_group + READS: ML_hydro::vel_group + READS: ML_hydro::vol_group + WRITES: ML_hydro::eps_group + WRITES: ML_hydro::press_group + WRITES: ML_hydro::rho_group + WRITES: ML_hydro::vel_group } "hydro_con2prim" schedule hydro_RHS IN hydro_evolCalcGroup AFTER hydro_fluxes @@ -171,6 +192,12 @@ schedule hydro_RHS IN hydro_evolCalcGroup AFTER hydro_fluxes SYNC: ene_grouprhs SYNC: mass_grouprhs SYNC: mom_grouprhs + READS: ML_hydro::eneflux_group + READS: ML_hydro::massflux_group + READS: ML_hydro::momflux_group + WRITES: ML_hydro::ene_grouprhs + WRITES: ML_hydro::mass_grouprhs + WRITES: ML_hydro::mom_grouprhs } "hydro_RHS" schedule ML_hydro_SelectBoundConds in MoL_PostStep @@ -188,6 +215,12 @@ schedule ML_hydro_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_hydro_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_hydro_ApplyBCs in MoL_PostStep after ML_hydro_SelectBoundConds { # no language specified diff --git a/ML_hydro/src/hydro_RHS.cc b/ML_hydro/src/hydro_RHS.cc index c47b671..3f41d0f 100644 --- a/ML_hydro/src/hydro_RHS.cc +++ b/ML_hydro/src/hydro_RHS.cc @@ -46,8 +46,6 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -78,9 +76,9 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -95,7 +93,7 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (hydro_RHS, + CCTK_LOOP3(hydro_RHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -162,7 +160,7 @@ static void hydro_RHS_Body(cGH const * restrict const cctkGH, int const dir, int mom2rhs[index] = mom2rhsL; mom3rhs[index] = mom3rhsL; } - CCTK_ENDLOOP3 (hydro_RHS); + CCTK_ENDLOOP3(hydro_RHS); } extern "C" void hydro_RHS(CCTK_ARGUMENTS) @@ -181,12 +179,18 @@ extern "C" void hydro_RHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eneflux_group","ML_hydro::ene_grouprhs","ML_hydro::massflux_group","ML_hydro::mass_grouprhs","ML_hydro::momflux_group","ML_hydro::mom_grouprhs"}; + const char *const groups[] = { + "ML_hydro::eneflux_group", + "ML_hydro::ene_grouprhs", + "ML_hydro::massflux_group", + "ML_hydro::mass_grouprhs", + "ML_hydro::momflux_group", + "ML_hydro::mom_grouprhs"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_RHS", 6, groups); GenericFD_EnsureStencilFits(cctkGH, "hydro_RHS", 1, 1, 1); - GenericFD_LoopOverInterior(cctkGH, &hydro_RHS_Body); + GenericFD_LoopOverInterior(cctkGH, hydro_RHS_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_con2prim.cc b/ML_hydro/src/hydro_con2prim.cc index d669e07..716321d 100644 --- a/ML_hydro/src/hydro_con2prim.cc +++ b/ML_hydro/src/hydro_con2prim.cc @@ -28,8 +28,6 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -60,9 +58,9 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -77,7 +75,7 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (hydro_con2prim, + CCTK_LOOP3(hydro_con2prim, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -124,7 +122,7 @@ static void hydro_con2prim_Body(cGH const * restrict const cctkGH, int const dir vel2[index] = vel2L; vel3[index] = vel3L; } - CCTK_ENDLOOP3 (hydro_con2prim); + CCTK_ENDLOOP3(hydro_con2prim); } extern "C" void hydro_con2prim(CCTK_ARGUMENTS) @@ -143,11 +141,19 @@ extern "C" void hydro_con2prim(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::ene_group","ML_hydro::eps_group","ML_hydro::mass_group","ML_hydro::mom_group","ML_hydro::press_group","ML_hydro::rho_group","ML_hydro::vel_group","ML_hydro::vol_group"}; + const char *const groups[] = { + "ML_hydro::ene_group", + "ML_hydro::eps_group", + "ML_hydro::mass_group", + "ML_hydro::mom_group", + "ML_hydro::press_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group", + "ML_hydro::vol_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_con2prim", 8, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_con2prim_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_con2prim_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_prim2con.cc b/ML_hydro/src/hydro_prim2con.cc index cf7aabf..d9aa747 100644 --- a/ML_hydro/src/hydro_prim2con.cc +++ b/ML_hydro/src/hydro_prim2con.cc @@ -28,8 +28,6 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -60,9 +58,9 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -77,7 +75,7 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (hydro_prim2con, + CCTK_LOOP3(hydro_prim2con, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -120,7 +118,7 @@ static void hydro_prim2con_Body(cGH const * restrict const cctkGH, int const dir mom3[index] = mom3L; vol[index] = volL; } - CCTK_ENDLOOP3 (hydro_prim2con); + CCTK_ENDLOOP3(hydro_prim2con); } extern "C" void hydro_prim2con(CCTK_ARGUMENTS) @@ -139,11 +137,18 @@ extern "C" void hydro_prim2con(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::ene_group","ML_hydro::eps_group","ML_hydro::mass_group","ML_hydro::mom_group","ML_hydro::rho_group","ML_hydro::vel_group","ML_hydro::vol_group"}; + const char *const groups[] = { + "ML_hydro::ene_group", + "ML_hydro::eps_group", + "ML_hydro::mass_group", + "ML_hydro::mom_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group", + "ML_hydro::vol_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_prim2con", 7, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_prim2con_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_prim2con_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_soundWave.cc b/ML_hydro/src/hydro_soundWave.cc index 6b8c0be..64f5d15 100644 --- a/ML_hydro/src/hydro_soundWave.cc +++ b/ML_hydro/src/hydro_soundWave.cc @@ -28,8 +28,6 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -60,9 +58,9 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -77,7 +75,7 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (hydro_soundWave, + CCTK_LOOP3(hydro_soundWave, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -95,11 +93,11 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di /* Calculate temporaries and grid functions */ CCTK_REAL rhoL = 1.; - CCTK_REAL vel1L = Sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel1L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); - CCTK_REAL vel2L = Sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel2L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); - CCTK_REAL vel3L = Sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); + CCTK_REAL vel3L = sin(2*xL*Pi*INV(ToReal(L)))*ToReal(A); CCTK_REAL epsL = 1.; @@ -110,7 +108,7 @@ static void hydro_soundWave_Body(cGH const * restrict const cctkGH, int const di vel2[index] = vel2L; vel3[index] = vel3L; } - CCTK_ENDLOOP3 (hydro_soundWave); + CCTK_ENDLOOP3(hydro_soundWave); } extern "C" void hydro_soundWave(CCTK_ARGUMENTS) @@ -129,11 +127,15 @@ extern "C" void hydro_soundWave(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eps_group","grid::coordinates","ML_hydro::rho_group","ML_hydro::vel_group"}; + const char *const groups[] = { + "ML_hydro::eps_group", + "grid::coordinates", + "ML_hydro::rho_group", + "ML_hydro::vel_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_soundWave", 4, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_soundWave_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_soundWave_Body); if (verbose > 1) { diff --git a/ML_hydro/src/hydro_vacuum.cc b/ML_hydro/src/hydro_vacuum.cc index f795107..5878b58 100644 --- a/ML_hydro/src/hydro_vacuum.cc +++ b/ML_hydro/src/hydro_vacuum.cc @@ -28,8 +28,6 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -60,9 +58,9 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL const p1o2dx = 0.5*INV(dx); CCTK_REAL const p1o2dy = 0.5*INV(dy); CCTK_REAL const p1o2dz = 0.5*INV(dz); - CCTK_REAL const p1o4dxdy = 0.25*INV(dx)*INV(dy); - CCTK_REAL const p1o4dxdz = 0.25*INV(dx)*INV(dz); - CCTK_REAL const p1o4dydz = 0.25*INV(dy)*INV(dz); + CCTK_REAL const p1o4dxdy = 0.25*INV(dx*dy); + CCTK_REAL const p1o4dxdz = 0.25*INV(dx*dz); + CCTK_REAL const p1o4dydz = 0.25*INV(dy*dz); CCTK_REAL const p1odx2 = INV(SQR(dx)); CCTK_REAL const p1ody2 = INV(SQR(dy)); CCTK_REAL const p1odz2 = INV(SQR(dz)); @@ -77,7 +75,7 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - CCTK_LOOP3 (hydro_vacuum, + CCTK_LOOP3(hydro_vacuum, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) { @@ -109,7 +107,7 @@ static void hydro_vacuum_Body(cGH const * restrict const cctkGH, int const dir, vel2[index] = vel2L; vel3[index] = vel3L; } - CCTK_ENDLOOP3 (hydro_vacuum); + CCTK_ENDLOOP3(hydro_vacuum); } extern "C" void hydro_vacuum(CCTK_ARGUMENTS) @@ -128,11 +126,14 @@ extern "C" void hydro_vacuum(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_hydro::eps_group","ML_hydro::rho_group","ML_hydro::vel_group"}; + const char *const groups[] = { + "ML_hydro::eps_group", + "ML_hydro::rho_group", + "ML_hydro::vel_group"}; GenericFD_AssertGroupStorage(cctkGH, "hydro_vacuum", 3, groups); - GenericFD_LoopOverEverything(cctkGH, &hydro_vacuum_Body); + GenericFD_LoopOverEverything(cctkGH, hydro_vacuum_Body); if (verbose > 1) { diff --git a/ML_hydro/src/make.code.defn b/ML_hydro/src/make.code.defn index f5bbd24..f7c60fa 100644 --- a/ML_hydro/src/make.code.defn +++ b/ML_hydro/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc hydro_vacuum.cc hydro_soundWave.cc hydro_prim2con.cc hydro_con2prim.cc hydro_RHS.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc hydro_vacuum.cc hydro_soundWave.cc hydro_prim2con.cc hydro_con2prim.cc hydro_RHS.cc Boundaries.cc |