diff options
Diffstat (limited to 'ML_BSSN')
-rw-r--r-- | ML_BSSN/schedule.ccl | 237 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_Advect.cc | 203 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_Dissipation.cc | 203 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_InitGamma.cc | 98 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_InitRHS.cc | 194 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_Minkowski.cc | 194 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_RHS1.cc | 182 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_RHS2.cc | 112 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc | 194 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_boundary.cc | 194 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_constraints1.cc | 75 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_constraints2.cc | 117 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertFromADMBase.cc | 160 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc | 111 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBase.cc | 150 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc | 92 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc | 90 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc | 90 | ||||
-rw-r--r-- | ML_BSSN/src/ML_BSSN_enforce.cc | 98 | ||||
-rw-r--r-- | ML_BSSN/src/make.code.defn | 2 |
20 files changed, 1011 insertions, 1785 deletions
diff --git a/ML_BSSN/schedule.ccl b/ML_BSSN/schedule.ccl index 381faeb..3e657c9 100644 --- a/ML_BSSN/schedule.ccl +++ b/ML_BSSN/schedule.ccl @@ -266,12 +266,6 @@ schedule ML_BSSN_Startup at STARTUP OPTIONS: meta } "create banner" -schedule ML_BSSN_RegisterVars in MoL_Register -{ - LANG: C - OPTIONS: meta -} "Register Variables for MoL" - schedule ML_BSSN_RegisterSymmetries in SymmetryRegister { LANG: C @@ -284,6 +278,15 @@ if (CCTK_EQUALS(my_initial_data, "Minkowski")) schedule ML_BSSN_Minkowski IN ADMBase_InitialData { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_Minkowski" } @@ -293,6 +296,18 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_convertFromADMBase AT initial AFTER ADMBase_PostInitial { LANG: C + READS: ADMBase::curv + READS: ADMBase::lapse + READS: ADMBase::metric + READS: ADMBase::shift + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_trace_curv + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_convertFromADMBase" } @@ -302,6 +317,9 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) schedule ML_BSSN_InitGamma AT initial BEFORE ML_BSSN_convertFromADMBaseGamma { LANG: C + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_InitGamma" } @@ -314,17 +332,62 @@ if (CCTK_EQUALS(my_initial_data, "ADMBase")) SYNC: ML_dtlapse SYNC: ML_dtshift SYNC: ML_Gamma + READS: ADMBase::dtlapse + READS: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma } "ML_BSSN_convertFromADMBaseGamma" } schedule ML_BSSN_RHS1 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHS1" schedule ML_BSSN_RHS2 IN ML_BSSN_evolCalcGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_curvrhs } "ML_BSSN_RHS2" @@ -333,17 +396,80 @@ if (CCTK_EQUALS(apply_dissipation, "always")) schedule ML_BSSN_Dissipation IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Dissipation" } schedule ML_BSSN_Advect IN ML_BSSN_evolCalcGroup AFTER (ML_BSSN_RHS1 ML_BSSN_RHS2) { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_curvrhs + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtlapserhs + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_dtshiftrhs + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_Gammarhs + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_lapserhs + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_log_confacrhs + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_metricrhs + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_shiftrhs + READS: ML_BSSN::ML_trace_curv + READS: ML_BSSN::ML_trace_curvrhs + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_Advect" schedule ML_BSSN_InitRHS AT analysis BEFORE ML_BSSN_evolCalcGroup { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_InitRHS" @@ -352,12 +478,26 @@ if (CCTK_EQUALS(my_rhs_boundary_condition, "static")) schedule ML_BSSN_RHSStaticBoundary IN MoL_CalcRHS { LANG: C + WRITES: ML_BSSN::ML_curvrhs + WRITES: ML_BSSN::ML_dtlapserhs + WRITES: ML_BSSN::ML_dtshiftrhs + WRITES: ML_BSSN::ML_Gammarhs + WRITES: ML_BSSN::ML_lapserhs + WRITES: ML_BSSN::ML_log_confacrhs + WRITES: ML_BSSN::ML_metricrhs + WRITES: ML_BSSN::ML_shiftrhs + WRITES: ML_BSSN::ML_trace_curvrhs } "ML_BSSN_RHSStaticBoundary" } schedule ML_BSSN_enforce IN MoL_PostStepModify { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_lapse } "ML_BSSN_enforce" @@ -366,12 +506,32 @@ if (CCTK_EQUALS(my_boundary_condition, "Minkowski")) schedule ML_BSSN_boundary IN MoL_PostStep { LANG: C + WRITES: ML_BSSN::ML_curv + WRITES: ML_BSSN::ML_dtlapse + WRITES: ML_BSSN::ML_dtshift + WRITES: ML_BSSN::ML_Gamma + WRITES: ML_BSSN::ML_lapse + WRITES: ML_BSSN::ML_log_confac + WRITES: ML_BSSN::ML_metric + WRITES: ML_BSSN::ML_shift + WRITES: ML_BSSN::ML_trace_curv } "ML_BSSN_boundary" } schedule ML_BSSN_convertToADMBase IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: ADMBase::metric + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::curv + WRITES: ADMBase::lapse + WRITES: ADMBase::metric + WRITES: ADMBase::shift } "ML_BSSN_convertToADMBase" @@ -382,6 +542,18 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) LANG: C SYNC: ADMBase::dtlapse SYNC: ADMBase::dtshift + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShift" } @@ -391,6 +563,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "correct")) schedule ML_BSSN_convertToADMBaseDtLapseShiftBoundary IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseDtLapseShiftBoundary" } @@ -400,6 +583,17 @@ if (CCTK_EQUALS(dt_lapse_shift_method, "noLapseShiftAdvection")) schedule ML_BSSN_convertToADMBaseFakeDtLapseShift IN ML_BSSN_convertToADMBaseGroup { LANG: C + READS: grid::coordinates + READS: Grid::coordinates + READS: ML_BSSN::ML_dtlapse + READS: ML_BSSN::ML_dtshift + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + WRITES: ADMBase::dtlapse + WRITES: ADMBase::dtshift } "ML_BSSN_convertToADMBaseFakeDtLapseShift" } @@ -411,6 +605,17 @@ schedule group ML_BSSN_constraints1_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints1 in ML_BSSN_constraints1_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_Ham } "ML_BSSN_constraints1" schedule ML_BSSN_constraints1_SelectBCs in ML_BSSN_constraints1_bc_group @@ -443,6 +648,20 @@ schedule group ML_BSSN_constraints2_group in MoL_PseudoEvolution after MoL_PostS schedule ML_BSSN_constraints2 in ML_BSSN_constraints2_group { LANG: C + READS: ML_BSSN::ML_curv + READS: ML_BSSN::ML_Gamma + READS: ML_BSSN::ML_lapse + READS: ML_BSSN::ML_log_confac + READS: ML_BSSN::ML_metric + READS: ML_BSSN::ML_shift + READS: ML_BSSN::ML_trace_curv + READS: TmunuBase::stress_energy_scalar + READS: TmunuBase::stress_energy_tensor + READS: TmunuBase::stress_energy_vector + WRITES: ML_BSSN::ML_cons_detg + WRITES: ML_BSSN::ML_cons_Gamma + WRITES: ML_BSSN::ML_cons_traceA + WRITES: ML_BSSN::ML_mom } "ML_BSSN_constraints2" schedule ML_BSSN_constraints2_SelectBCs in ML_BSSN_constraints2_bc_group @@ -491,6 +710,12 @@ schedule ML_BSSN_CheckBoundaries at BASEGRID OPTIONS: meta } "check boundaries treatment" +schedule ML_BSSN_RegisterVars in MoL_Register +{ + LANG: C + OPTIONS: meta +} "Register Variables for MoL" + schedule group ApplyBCs as ML_BSSN_ApplyBCs in MoL_PostStep after ML_BSSN_SelectBoundConds { # no language specified diff --git a/ML_BSSN/src/ML_BSSN_Advect.cc b/ML_BSSN/src/ML_BSSN_Advect.cc index 9dffd24..2543f21 100644 --- a/ML_BSSN/src/ML_BSSN_Advect.cc +++ b/ML_BSSN/src/ML_BSSN_Advect.cc @@ -65,8 +65,6 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Advect, + LC_LOOP3VEC(ML_BSSN_Advect, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -2061,132 +2059,35 @@ static void ML_BSSN_Advect_Body(cGH const * restrict const cctkGH, int const dir B3rhsL = kmadd(kmsub(kmadd(beta1L,JacPDupwindNthAnti1B3,kmadd(beta2L,JacPDupwindNthAnti2B3,kmadd(beta3L,JacPDupwindNthAnti3B3,kmadd(JacPDupwindNthSymm1B3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2B3,kfabs(beta2L),kmul(JacPDupwindNthSymm3B3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),kmul(kmadd(beta1L,JacPDupwindNthAnti1Xt3,kmadd(beta2L,JacPDupwindNthAnti2Xt3,kmadd(beta3L,JacPDupwindNthAnti3Xt3,kmadd(JacPDupwindNthSymm1Xt3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2Xt3,kfabs(beta2L),kmul(JacPDupwindNthSymm3Xt3,kfabs(beta3L))))))),kadd(ToReal(-1),ToReal(ShiftAdvectionCoeff)))),ToReal(ShiftBCoeff),B3rhsL); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Advect); + LC_ENDLOOP3VEC(ML_BSSN_Advect); } extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) @@ -2205,7 +2106,25 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Advect", 18, groups); switch(fdOrder) @@ -2227,7 +2146,7 @@ extern "C" void ML_BSSN_Advect(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Advect_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Advect_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Dissipation.cc b/ML_BSSN/src/ML_BSSN_Dissipation.cc index ce66fd0..98378a0 100644 --- a/ML_BSSN/src/ML_BSSN_Dissipation.cc +++ b/ML_BSSN/src/ML_BSSN_Dissipation.cc @@ -65,8 +65,6 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Dissipation, + LC_LOOP3VEC(ML_BSSN_Dissipation, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1236,132 +1234,35 @@ static void ML_BSSN_Dissipation_Body(cGH const * restrict const cctkGH, int cons B3rhsL = kadd(B3rhsL,kmadd(epsdiss1,JacPDdissipationNth1B3,kmadd(epsdiss2,JacPDdissipationNth2B3,kmul(epsdiss3,JacPDdissipationNth3B3)))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_Dissipation); + LC_ENDLOOP3VEC(ML_BSSN_Dissipation); } extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) @@ -1380,7 +1281,25 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Dissipation", 18, groups); switch(fdOrder) @@ -1402,7 +1321,7 @@ extern "C" void ML_BSSN_Dissipation(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_Dissipation_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_Dissipation_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitGamma.cc b/ML_BSSN/src/ML_BSSN_InitGamma.cc index 480696a..647de3b 100644 --- a/ML_BSSN/src/ML_BSSN_InitGamma.cc +++ b/ML_BSSN/src/ML_BSSN_InitGamma.cc @@ -29,8 +29,6 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitGamma, + LC_LOOP3VEC(ML_BSSN_InitGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -238,60 +236,17 @@ static void ML_BSSN_InitGamma_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_InitGamma); + LC_ENDLOOP3VEC(ML_BSSN_InitGamma); } extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) @@ -310,7 +265,10 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma"}; + const char *const groups[] = { + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitGamma", 3, groups); switch(fdOrder) @@ -328,7 +286,7 @@ extern "C" void ML_BSSN_InitGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitGamma_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_InitRHS.cc b/ML_BSSN/src/ML_BSSN_InitRHS.cc index 3c556f5..0f36ec8 100644 --- a/ML_BSSN/src/ML_BSSN_InitRHS.cc +++ b/ML_BSSN/src/ML_BSSN_InitRHS.cc @@ -29,8 +29,6 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_InitRHS, + LC_LOOP3VEC(ML_BSSN_InitRHS, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_InitRHS_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_InitRHS); + LC_ENDLOOP3VEC(ML_BSSN_InitRHS); } extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_InitRHS", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_InitRHS(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_InitRHS_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_InitRHS_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_Minkowski.cc b/ML_BSSN/src/ML_BSSN_Minkowski.cc index aaa94dc..f1033d0 100644 --- a/ML_BSSN/src/ML_BSSN_Minkowski.cc +++ b/ML_BSSN/src/ML_BSSN_Minkowski.cc @@ -29,8 +29,6 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_Minkowski, + LC_LOOP3VEC(ML_BSSN_Minkowski, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -274,132 +272,35 @@ static void ML_BSSN_Minkowski_Body(cGH const * restrict const cctkGH, int const CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_Minkowski); + LC_ENDLOOP3VEC(ML_BSSN_Minkowski); } extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) @@ -418,7 +319,16 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_Minkowski", 9, groups); switch(fdOrder) @@ -436,7 +346,7 @@ extern "C" void ML_BSSN_Minkowski(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_Minkowski_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_Minkowski_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS1.cc b/ML_BSSN/src/ML_BSSN_RHS1.cc index 35f76e5..3775bb7 100644 --- a/ML_BSSN/src/ML_BSSN_RHS1.cc +++ b/ML_BSSN/src/ML_BSSN_RHS1.cc @@ -62,8 +62,6 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -100,9 +98,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -121,14 +119,14 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -142,9 +140,9 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -225,7 +223,7 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS1, + LC_LOOP3VEC(ML_BSSN_RHS1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1311,13 +1309,13 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, kmul(ToReal(-0.666666666666666666666666666667),kmadd(kmadd(gt13L,JacPDstandardNth3beta1,kmul(gt23L,JacPDstandardNth3beta2)),ToReal(-3),kmadd(gt33L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmul(alphaL,kmul(At33L,ToReal(3)))))); CCTK_REAL_VEC dotXt1 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),ToReal(-24)),kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmul(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu11,JacPDstandardNth1alpha,kmadd(Atu12,JacPDstandardNth2alpha,kmul(Atu13,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth2beta1,Xtn2,kmul(JacPDstandardNth3beta1,Xtn3)),ToReal(-3),kmadd(Xtn1,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth1beta1),kmadd(kmadd(gtu12,JacPDstandardNth12beta1,kmadd(gtu13,JacPDstandardNth13beta1,kmadd(gtu22,JacPDstandardNth22beta1,kmadd(gtu23,kadd(JacPDstandardNth23beta1,JacPDstandardNth32beta1),kmul(gtu33,JacPDstandardNth33beta1))))),ToReal(3),kmadd(gtu11,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth11beta1,ToReal(4),JacPDstandardNth13beta3)),kmadd(gtu12,kadd(JacPDstandardNth22beta2,kmadd(JacPDstandardNth21beta1,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu13,kadd(JacPDstandardNth32beta2,kmadd(JacPDstandardNth31beta1,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth2beta2,Xtn1,kmul(alphaL,kmadd(kmadd(gtu11,JacPDstandardNth1trK,kmadd(gtu12,JacPDstandardNth2trK,kmul(gtu13,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu23,Gt123,kmul(Atu12,kmadd(cdphi2,ToReal(3),Gt112))),ToReal(6),kmadd(ToReal(3),kmadd(Atu22,Gt122,kmadd(Atu33,Gt133,kmul(Atu11,kmadd(cdphi1,ToReal(6),Gt111)))),kmadd(Atu13,kmadd(Gt113,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu11,S1,kmadd(gtu12,S2,kmul(gtu13,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt2 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),ToReal(-24)),kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmul(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu12,JacPDstandardNth1alpha,kmadd(Atu22,JacPDstandardNth2alpha,kmul(Atu23,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta2,Xtn1,kmul(JacPDstandardNth3beta2,Xtn3)),ToReal(-3),kmadd(Xtn2,kmsub(JacPDstandardNth3beta3,ToReal(2),JacPDstandardNth2beta2),kmadd(kmadd(gtu11,JacPDstandardNth11beta2,kmadd(gtu23,JacPDstandardNth23beta2,kmadd(gtu13,kadd(JacPDstandardNth13beta2,JacPDstandardNth31beta2),kmul(gtu33,JacPDstandardNth33beta2)))),ToReal(3),kmadd(gtu12,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth13beta3,kmadd(JacPDstandardNth21beta2,ToReal(3),kmul(JacPDstandardNth12beta2,ToReal(4))))),kmadd(gtu22,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth22beta2,ToReal(4),JacPDstandardNth23beta3)),kmadd(gtu23,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth32beta2,ToReal(4),JacPDstandardNth33beta3)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn2,kmul(alphaL,kmadd(kmadd(gtu12,JacPDstandardNth1trK,kmadd(gtu22,JacPDstandardNth2trK,kmul(gtu23,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu13,Gt213,kmul(Atu12,kmadd(cdphi1,ToReal(3),Gt212))),ToReal(6),kmadd(ToReal(3),kmadd(Atu11,Gt211,kmadd(Atu33,Gt233,kmul(Atu22,kmadd(cdphi2,ToReal(6),Gt222)))),kmadd(Atu23,kmadd(Gt223,ToReal(6),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu12,S1,kmadd(gtu22,S2,kmul(gtu23,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC dotXt3 = - kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(Pi,kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),ToReal(-24)),kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmul(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))))))))))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atu13,JacPDstandardNth1alpha,kmadd(Atu23,JacPDstandardNth2alpha,kmul(Atu33,JacPDstandardNth3alpha))),ToReal(-6),kmadd(kmadd(JacPDstandardNth1beta3,Xtn1,kmul(JacPDstandardNth2beta3,Xtn2)),ToReal(-3),kmadd(Xtn3,kmsub(JacPDstandardNth2beta2,ToReal(2),JacPDstandardNth3beta3),kmadd(kmadd(gtu11,JacPDstandardNth11beta3,kmadd(gtu12,kadd(JacPDstandardNth12beta3,JacPDstandardNth21beta3),kmadd(gtu22,JacPDstandardNth22beta3,kmul(gtu23,JacPDstandardNth32beta3)))),ToReal(3),kmadd(gtu13,kadd(JacPDstandardNth11beta1,kadd(JacPDstandardNth12beta2,kmadd(JacPDstandardNth31beta3,ToReal(3),kmul(JacPDstandardNth13beta3,ToReal(4))))),kmadd(gtu23,kadd(JacPDstandardNth21beta1,kmadd(JacPDstandardNth23beta3,ToReal(4),JacPDstandardNth22beta2)),kmadd(gtu33,kadd(JacPDstandardNth31beta1,kmadd(JacPDstandardNth33beta3,ToReal(4),JacPDstandardNth32beta2)),kmul(ToReal(2),kmadd(JacPDstandardNth1beta1,Xtn3,kmul(alphaL,kmadd(kmadd(gtu13,JacPDstandardNth1trK,kmadd(gtu23,JacPDstandardNth2trK,kmul(gtu33,JacPDstandardNth3trK))),ToReal(-2),kmadd(kmadd(Atu11,Gt311,kmul(Atu22,Gt322)),ToReal(3),kmadd(kmadd(Atu12,Gt312,kmadd(Atu13,kmadd(cdphi1,ToReal(3),Gt313),kmul(Atu23,kmadd(cdphi2,ToReal(3),Gt323)))),ToReal(6),kmadd(Atu33,kmadd(Gt333,ToReal(3),kmul(cdphi3,ToReal(18))),kmul(kmadd(gtu13,S1,kmadd(gtu23,S2,kmul(gtu33,S3))),kmul(ToReal(-24),ToReal(Pi)))))))))))))))))); CCTK_REAL_VEC Xt1rhsL = dotXt1; @@ -1326,7 +1324,7 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC Xt3rhsL = dotXt3; CCTK_REAL_VEC dottrK = - kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(Pi,kmul(kadd(rho,trS),ToReal(4)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); + kmsub(alphaL,kadd(SQR(Atm11),kadd(SQR(Atm22),kadd(SQR(Atm33),kmadd(SQR(trKL),ToReal(0.333333333333333333333333333333),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(2),kmul(kadd(rho,trS),kmul(ToReal(4),ToReal(Pi)))))))),kmul(em4phi,knmsub(JacPDstandardNth1alpha,Xtn1,knmsub(JacPDstandardNth2alpha,Xtn2,knmsub(JacPDstandardNth3alpha,Xtn3,kmadd(gtu11,kmadd(cdphi1,kmul(JacPDstandardNth1alpha,ToReal(2)),JacPDstandardNth11alpha),kmadd(gtu12,kadd(JacPDstandardNth12alpha,kadd(JacPDstandardNth21alpha,kmadd(cdphi2,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth2alpha,ToReal(2)))))),kmadd(gtu22,kmadd(cdphi2,kmul(JacPDstandardNth2alpha,ToReal(2)),JacPDstandardNth22alpha),kmadd(gtu13,kadd(JacPDstandardNth13alpha,kadd(JacPDstandardNth31alpha,kmadd(cdphi3,kmul(JacPDstandardNth1alpha,ToReal(2)),kmul(cdphi1,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmadd(gtu23,kadd(JacPDstandardNth23alpha,kadd(JacPDstandardNth32alpha,kmadd(cdphi3,kmul(JacPDstandardNth2alpha,ToReal(2)),kmul(cdphi2,kmul(JacPDstandardNth3alpha,ToReal(2)))))),kmul(gtu33,kmadd(cdphi3,kmul(JacPDstandardNth3alpha,ToReal(2)),JacPDstandardNth33alpha)))))))))))); CCTK_REAL_VEC trKrhsL = dottrK; @@ -1378,108 +1376,29 @@ static void ML_BSSN_RHS1_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC B3rhsL = kmul(knmsub(B3L,kmul(eta,ToReal(BetaDriver)),dotXt3),ToReal(ShiftBCoeff)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS1); + LC_ENDLOOP3VEC(ML_BSSN_RHS1); } extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) @@ -1498,7 +1417,26 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"grid::coordinates","Grid::coordinates","ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshift","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapse","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confac","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metric","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shift","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curv","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curv", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS1", 19, groups); switch(fdOrder) @@ -1520,7 +1458,7 @@ extern "C" void ML_BSSN_RHS1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHS2.cc b/ML_BSSN/src/ML_BSSN_RHS2.cc index b918080..fa76e9b 100644 --- a/ML_BSSN/src/ML_BSSN_RHS2.cc +++ b/ML_BSSN/src/ML_BSSN_RHS2.cc @@ -41,8 +41,6 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHS2, + LC_LOOP3VEC(ML_BSSN_RHS2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1787,73 +1785,33 @@ static void ML_BSSN_RHS2_Body(cGH const * restrict const cctkGH, int const dir, kmadd(Ats11,gu11,kmadd(Ats22,gu22,kmadd(Ats33,gu33,kmul(kmadd(Ats12,gu12,kmadd(Ats13,gu13,kmul(Ats23,gu23))),ToReal(2))))); CCTK_REAL_VEC At11rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats11,ToReal(3),kmul(g11,trAts)),kmadd(At11L,kmadd(kadd(JacPDstandardNth2beta2,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth1beta1,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth1beta2,kmul(At13L,JacPDstandardNth1beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm21,kmul(At13L,Atm31)),ToReal(6),kmadd(At11L,kmadd(trKL,ToReal(-3),kmul(Atm11,ToReal(6))),kmul(em4phi,kmul(kmadd(g11,kmul(trS,ToReal(-8)),kmul(eTxxL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At12rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxyL,kmul(Pi,ToReal(-8)),kmul(g12,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At12L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth3beta3,ToReal(-2),JacPDstandardNth2beta2)),kmsub(kmadd(Ats12,em4phi,kmadd(At22L,JacPDstandardNth1beta2,kmadd(At23L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth2beta1,kmul(At13L,JacPDstandardNth2beta3))))),ToReal(3),kmul(em4phi,kmul(g12,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm12,kmul(At13L,Atm32)),ToReal(-2),kmadd(At12L,kmadd(Atm22,ToReal(-2),trKL),kmul(em4phi,kmadd(g12,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxyL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At13rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTxzL,kmul(Pi,ToReal(-8)),kmul(g13,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At13L,kadd(JacPDstandardNth1beta1,kmadd(JacPDstandardNth2beta2,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats13,em4phi,kmadd(At23L,JacPDstandardNth1beta2,kmadd(At33L,JacPDstandardNth1beta3,kmadd(At11L,JacPDstandardNth3beta1,kmul(At12L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g13,trAts)))),kmul(alphaL,kmadd(kmadd(At11L,Atm13,kmul(At12L,Atm23)),ToReal(-2),kmadd(At13L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g13,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTxzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At22rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24)))))))))))); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats22,ToReal(3),kmul(g22,trAts)),kmadd(At22L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth3beta3),ToReal(-2),kmul(JacPDstandardNth2beta2,ToReal(4))),kmsub(kmadd(At12L,JacPDstandardNth2beta1,kmul(At23L,JacPDstandardNth2beta3)),ToReal(6),kmul(alphaL,kmadd(kmadd(At12L,Atm12,kmul(At23L,Atm32)),ToReal(6),kmadd(At22L,kmadd(trKL,ToReal(-3),kmul(Atm22,ToReal(6))),kmul(em4phi,kmul(kmadd(g22,kmul(trS,ToReal(-8)),kmul(eTyyL,ToReal(24))),ToReal(Pi)))))))))); CCTK_REAL_VEC At23rhsL = - kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(eTyzL,kmul(Pi,ToReal(-8)),kmul(g23,kmul(trS,ToReal(8.37758040957278196923371568875))))))))); + kmadd(ToReal(0.333333333333333333333333333333),kmadd(At23L,kadd(JacPDstandardNth2beta2,kmadd(JacPDstandardNth1beta1,ToReal(-2),JacPDstandardNth3beta3)),kmsub(kmadd(Ats23,em4phi,kmadd(At13L,JacPDstandardNth2beta1,kmadd(At33L,JacPDstandardNth2beta3,kmadd(At12L,JacPDstandardNth3beta1,kmul(At22L,JacPDstandardNth3beta2))))),ToReal(3),kmul(em4phi,kmul(g23,trAts)))),kmul(alphaL,kmadd(kmadd(At12L,Atm13,kmul(At22L,Atm23)),ToReal(-2),kmadd(At23L,kmadd(Atm33,ToReal(-2),trKL),kmul(em4phi,kmadd(g23,kmul(trS,ToReal(8.37758040957278196923371568875)),kmul(eTyzL,kmul(ToReal(-8),ToReal(Pi))))))))); CCTK_REAL_VEC At33rhsL = - kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(Pi,kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24)))))))))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - break; - } - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); + kmul(ToReal(0.333333333333333333333333333333),kmadd(em4phi,kmsub(Ats33,ToReal(3),kmul(g33,trAts)),kmadd(At33L,kmadd(kadd(JacPDstandardNth1beta1,JacPDstandardNth2beta2),ToReal(-2),kmul(JacPDstandardNth3beta3,ToReal(4))),kmsub(kmadd(At13L,JacPDstandardNth3beta1,kmul(At23L,JacPDstandardNth3beta2)),ToReal(6),kmul(alphaL,kmadd(kmadd(At13L,Atm13,kmul(At23L,Atm23)),ToReal(6),kmadd(At33L,kmadd(trKL,ToReal(-3),kmul(Atm33,ToReal(6))),kmul(em4phi,kmul(kmadd(g33,kmul(trS,ToReal(-8)),kmul(eTzzL,ToReal(24))),ToReal(Pi)))))))))); + + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHS2); + LC_ENDLOOP3VEC(ML_BSSN_RHS2); } extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) @@ -1872,7 +1830,15 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_curvrhs","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHS2", 8, groups); switch(fdOrder) @@ -1894,7 +1860,7 @@ extern "C" void ML_BSSN_RHS2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_RHS2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_RHS2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc index ed145c6..ba11ca7 100644 --- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary, + LC_LOOP3VEC(ML_BSSN_RHSStaticBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in CCTK_REAL_VEC B3rhsL = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); - vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); - vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); - vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); - vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); - vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); - vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); - vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); - vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); - vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); - vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); - vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); - vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); - vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); - vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); - vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); - vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); - vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); - vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); - vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); - vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); - vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); - vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); - vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); - vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); - break; - } - vec_store_nta(alpharhs[index],alpharhsL); - vec_store_nta(Arhs[index],ArhsL); - vec_store_nta(At11rhs[index],At11rhsL); - vec_store_nta(At12rhs[index],At12rhsL); - vec_store_nta(At13rhs[index],At13rhsL); - vec_store_nta(At22rhs[index],At22rhsL); - vec_store_nta(At23rhs[index],At23rhsL); - vec_store_nta(At33rhs[index],At33rhsL); - vec_store_nta(B1rhs[index],B1rhsL); - vec_store_nta(B2rhs[index],B2rhsL); - vec_store_nta(B3rhs[index],B3rhsL); - vec_store_nta(beta1rhs[index],beta1rhsL); - vec_store_nta(beta2rhs[index],beta2rhsL); - vec_store_nta(beta3rhs[index],beta3rhsL); - vec_store_nta(gt11rhs[index],gt11rhsL); - vec_store_nta(gt12rhs[index],gt12rhsL); - vec_store_nta(gt13rhs[index],gt13rhsL); - vec_store_nta(gt22rhs[index],gt22rhsL); - vec_store_nta(gt23rhs[index],gt23rhsL); - vec_store_nta(gt33rhs[index],gt33rhsL); - vec_store_nta(phirhs[index],phirhsL); - vec_store_nta(trKrhs[index],trKrhsL); - vec_store_nta(Xt1rhs[index],Xt1rhsL); - vec_store_nta(Xt2rhs[index],Xt2rhsL); - vec_store_nta(Xt3rhs[index],Xt3rhsL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpharhs[index],alpharhsL); + vec_store_nta_partial(Arhs[index],ArhsL); + vec_store_nta_partial(At11rhs[index],At11rhsL); + vec_store_nta_partial(At12rhs[index],At12rhsL); + vec_store_nta_partial(At13rhs[index],At13rhsL); + vec_store_nta_partial(At22rhs[index],At22rhsL); + vec_store_nta_partial(At23rhs[index],At23rhsL); + vec_store_nta_partial(At33rhs[index],At33rhsL); + vec_store_nta_partial(B1rhs[index],B1rhsL); + vec_store_nta_partial(B2rhs[index],B2rhsL); + vec_store_nta_partial(B3rhs[index],B3rhsL); + vec_store_nta_partial(beta1rhs[index],beta1rhsL); + vec_store_nta_partial(beta2rhs[index],beta2rhsL); + vec_store_nta_partial(beta3rhs[index],beta3rhsL); + vec_store_nta_partial(gt11rhs[index],gt11rhsL); + vec_store_nta_partial(gt12rhs[index],gt12rhsL); + vec_store_nta_partial(gt13rhs[index],gt13rhsL); + vec_store_nta_partial(gt22rhs[index],gt22rhsL); + vec_store_nta_partial(gt23rhs[index],gt23rhsL); + vec_store_nta_partial(gt33rhs[index],gt33rhsL); + vec_store_nta_partial(phirhs[index],phirhsL); + vec_store_nta_partial(trKrhs[index],trKrhsL); + vec_store_nta_partial(Xt1rhs[index],Xt1rhsL); + vec_store_nta_partial(Xt2rhs[index],Xt2rhsL); + vec_store_nta_partial(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary); + LC_ENDLOOP3VEC(ML_BSSN_RHSStaticBoundary); } extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curvrhs","ML_BSSN::ML_dtlapserhs","ML_BSSN::ML_dtshiftrhs","ML_BSSN::ML_Gammarhs","ML_BSSN::ML_lapserhs","ML_BSSN::ML_log_confacrhs","ML_BSSN::ML_metricrhs","ML_BSSN::ML_shiftrhs","ML_BSSN::ML_trace_curvrhs"}; + const char *const groups[] = { + "ML_BSSN::ML_curvrhs", + "ML_BSSN::ML_dtlapserhs", + "ML_BSSN::ML_dtshiftrhs", + "ML_BSSN::ML_Gammarhs", + "ML_BSSN::ML_lapserhs", + "ML_BSSN::ML_log_confacrhs", + "ML_BSSN::ML_metricrhs", + "ML_BSSN::ML_shiftrhs", + "ML_BSSN::ML_trace_curvrhs"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_RHSStaticBoundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundary(cctkGH, &ML_BSSN_RHSStaticBoundary_Body); + GenericFD_LoopOverBoundary(cctkGH, ML_BSSN_RHSStaticBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_boundary.cc b/ML_BSSN/src/ML_BSSN_boundary.cc index 950546b..140da41 100644 --- a/ML_BSSN/src/ML_BSSN_boundary.cc +++ b/ML_BSSN/src/ML_BSSN_boundary.cc @@ -65,8 +65,6 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -103,9 +101,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -124,14 +122,14 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -145,9 +143,9 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -228,7 +226,7 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_boundary, + LC_LOOP3VEC(ML_BSSN_boundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -310,132 +308,35 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d CCTK_REAL_VEC B3L = ToReal(0); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_boundary); + LC_ENDLOOP3VEC(ML_BSSN_boundary); } extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) @@ -454,7 +355,16 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_boundary", 9, groups); switch(fdOrder) @@ -472,7 +382,7 @@ extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_boundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_boundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints1.cc b/ML_BSSN/src/ML_BSSN_constraints1.cc index b44a951..8f89811 100644 --- a/ML_BSSN/src/ML_BSSN_constraints1.cc +++ b/ML_BSSN/src/ML_BSSN_constraints1.cc @@ -41,8 +41,6 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -79,9 +77,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -100,14 +98,14 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -121,9 +119,9 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -204,7 +202,7 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints1, + LC_LOOP3VEC(ML_BSSN_constraints1, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -1561,38 +1559,13 @@ static void ML_BSSN_constraints1_Body(cGH const * restrict const cctkGH, int con kmul(INV(SQR(alphaL)),kadd(eTttL,kmadd(eTxxL,SQR(beta1L),kmadd(eTyyL,SQR(beta2L),kmadd(eTzzL,SQR(beta3L),kmadd(kmadd(beta2L,eTtyL,kmul(beta3L,eTtzL)),ToReal(-2),kmul(kmadd(beta2L,kmul(beta3L,eTyzL),kmul(beta1L,kmadd(beta2L,eTxyL,kmsub(beta3L,eTxzL,eTtxL)))),ToReal(2)))))))); CCTK_REAL_VEC HL = - kadd(trR,kmadd(Pi,kmul(rho,ToReal(-16)),kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(kmsub(SQR(trKL),ToReal(0.666666666666666666666666666667),SQR(Atm33)),SQR(Atm22)),SQR(Atm11))))); - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(H[index],HL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ + kadd(trR,kmadd(kmadd(Atm12,Atm21,kmadd(Atm13,Atm31,kmul(Atm23,Atm32))),ToReal(-2),ksub(ksub(ksub(kmadd(SQR(trKL),ToReal(0.666666666666666666666666666667),kmul(rho,kmul(ToReal(-16),ToReal(Pi)))),SQR(Atm33)),SQR(Atm22)),SQR(Atm11)))); - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(H[index],HL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(H[index],HL,elt_count); - break; - } - vec_store_nta(H[index],HL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(H[index],HL); } - LC_ENDLOOP3VEC (ML_BSSN_constraints1); + LC_ENDLOOP3VEC(ML_BSSN_constraints1); } extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) @@ -1611,7 +1584,15 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_Ham","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_Ham", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints1", 8, groups); switch(fdOrder) @@ -1633,7 +1614,7 @@ extern "C" void ML_BSSN_constraints1(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints1_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints1_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_constraints2.cc b/ML_BSSN/src/ML_BSSN_constraints2.cc index 09be780..ecc059b 100644 --- a/ML_BSSN/src/ML_BSSN_constraints2.cc +++ b/ML_BSSN/src/ML_BSSN_constraints2.cc @@ -50,8 +50,6 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -88,9 +86,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -109,14 +107,14 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -130,9 +128,9 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -213,7 +211,7 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_constraints2, + LC_LOOP3VEC(ML_BSSN_constraints2, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -912,13 +910,13 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con kmul(INV(alphaL),kmadd(beta1L,eTxzL,kmadd(beta2L,eTyzL,kmsub(beta3L,eTzzL,eTtzL)))); CCTK_REAL_VEC M1L = - kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(Pi,kmul(S1,ToReal(-8)),kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmul(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At11,kmadd(gtu12,JacPDstandardNth2At11,kmadd(gtu22,JacPDstandardNth2At12,kmadd(gtu23,JacPDstandardNth2At13,kmadd(gtu13,JacPDstandardNth3At11,kmadd(gtu23,JacPDstandardNth3At12,kmadd(gtu33,JacPDstandardNth3At13,kmadd(gtu12,kadd(JacPDstandardNth1At12,kmsub(At13L,kmul(Gt312,ToReal(-3)),kmul(At22L,Gt211))),kmadd(gtu13,kadd(JacPDstandardNth1At13,kmsub(At13L,kmul(Gt313,ToReal(-3)),kmul(At23L,Gt211))),knmsub(gtu22,kmadd(At23L,Gt312,kmul(At22L,Gt212)),kmadd(Gt311,kmsub(At13L,kmul(gtu11,ToReal(-2)),kmadd(At33L,gtu13,kmul(At23L,gtu12))),kmadd(At13L,kmsub(Gt323,kmul(gtu23,ToReal(-2)),kmul(Gt113,gtu33)),kmadd(JacPDstandardNth1trK,ToReal(-0.666666666666666666666666666667),knmsub(At12L,kmadd(Gt111,gtu12,kmadd(Gt112,gtu22,kmadd(Gt222,gtu22,kmadd(Gt113,gtu23,kmadd(Gt233,gtu33,kmadd(cdphi1,kmul(gtu12,ToReal(-6)),kmadd(cdphi2,kmul(gtu22,ToReal(-6)),kmadd(cdphi3,kmul(gtu23,ToReal(-6)),kmadd(Gt211,kmul(gtu11,ToReal(2)),kmadd(Gt223,kmul(gtu23,ToReal(2)),kmadd(Gt212,kmul(gtu12,ToReal(3)),kmul(Gt213,kmul(gtu13,ToReal(3)))))))))))))),kmadd(gtu23,knmsub(At22L,Gt213,knmsub(At33L,Gt312,kmsub(At13L,kmul(cdphi2,ToReal(6)),kmul(At23L,kadd(Gt313,Gt212))))),kmadd(gtu33,knmsub(At23L,Gt213,kmsub(At13L,kmul(cdphi3,ToReal(6)),kmul(At33L,Gt313))),kmadd(At11L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt111,kmul(gtu11,ToReal(-2)),kmadd(Gt123,kmul(gtu23,ToReal(-2)),knmsub(Gt122,gtu22,knmsub(Gt133,gtu33,kmadd(cdphi1,kmul(gtu11,ToReal(6)),kmadd(cdphi2,kmul(gtu12,ToReal(6)),kmul(cdphi3,kmul(gtu13,ToReal(6))))))))))),kmadd(At13L,knmsub(Gt322,gtu22,knmsub(Gt112,gtu23,kmsub(gtu13,kmsub(cdphi1,ToReal(6),Gt111),kmul(Gt333,gtu33)))),kmul(S1,kmul(ToReal(-8),ToReal(Pi))))))))))))))))))))); CCTK_REAL_VEC M2L = - kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(Pi,kmul(S2,ToReal(-8)),kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmul(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At12,kmadd(gtu12,JacPDstandardNth1At22,kmadd(gtu13,JacPDstandardNth1At23,kmadd(gtu12,JacPDstandardNth2At12,kmadd(gtu22,JacPDstandardNth2At22,kmadd(gtu23,JacPDstandardNth2At23,kmadd(gtu13,JacPDstandardNth3At12,kmadd(gtu23,JacPDstandardNth3At22,kmadd(gtu33,JacPDstandardNth3At23,kmadd(At22L,kmul(Gt222,kmul(gtu22,ToReal(-2))),knmsub(At11L,kmadd(Gt112,gtu11,kmadd(Gt122,gtu12,kmul(Gt123,gtu13))),knmsub(gtu11,kmadd(At22L,Gt211,kmadd(At13L,Gt312,kmul(At23L,Gt311))),kmadd(Gt212,kmsub(At22L,kmul(gtu12,ToReal(-3)),kmul(At23L,gtu13)),kmadd(Gt312,kmsub(At23L,kmul(gtu12,ToReal(-3)),kmul(At33L,gtu13)),kmadd(Gt322,kmsub(At23L,kmul(gtu22,ToReal(-2)),kmul(At33L,gtu23)),kmadd(Gt223,kmsub(At22L,kmul(gtu23,ToReal(-3)),kmul(At23L,gtu33)),kmadd(Gt323,kmsub(At23L,kmul(gtu23,ToReal(-3)),kmul(At33L,gtu33)),kmadd(At22L,kmsub(Gt213,kmul(gtu13,ToReal(-2)),kmul(Gt233,gtu33)),kmadd(At23L,kmsub(Gt313,kmul(gtu13,ToReal(-2)),kmadd(Gt333,gtu33,kmul(Gt222,gtu23))),kmadd(JacPDstandardNth2trK,ToReal(-0.666666666666666666666666666667),kmadd(At22L,kmul(cdphi2,kmul(gtu22,ToReal(6))),kmadd(At22L,kmul(cdphi3,kmul(gtu23,ToReal(6))),kmadd(gtu12,kmsub(At22L,kmul(cdphi1,ToReal(6)),kmul(At13L,Gt322)),kmadd(gtu13,kmsub(At23L,kmul(cdphi1,ToReal(6)),kmul(At13L,kadd(Gt323,Gt112))),kmadd(gtu23,kmsub(At23L,kmul(cdphi2,ToReal(6)),kmul(At13L,Gt122)),kmadd(gtu33,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At13L,Gt123)),kmadd(At12L,kmadd(Gt112,kmul(gtu12,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt113,kmul(gtu13,ToReal(-2)),kmadd(Gt122,kmul(gtu22,ToReal(-2)),knmsub(Gt133,gtu33,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt212),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt222),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt223))))))))),kmul(S2,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC M3L = - kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(Pi,kmul(S3,ToReal(-8)),kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmul(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))))))))))))))))))))))))))))))); + kmadd(gtu11,JacPDstandardNth1At13,kmadd(gtu12,JacPDstandardNth1At23,kmadd(gtu13,JacPDstandardNth1At33,kmadd(gtu12,JacPDstandardNth2At13,kmadd(gtu22,JacPDstandardNth2At23,kmadd(gtu23,JacPDstandardNth2At33,kmadd(gtu13,JacPDstandardNth3At13,kmadd(gtu23,JacPDstandardNth3At23,kmadd(gtu33,JacPDstandardNth3At33,kmadd(At33L,kmul(Gt313,kmul(gtu13,ToReal(-3))),kmadd(At23L,kmul(Gt223,kmul(gtu23,ToReal(-3))),kmadd(At33L,kmul(Gt323,kmul(gtu23,ToReal(-3))),kmadd(At23L,kmul(Gt233,kmul(gtu33,ToReal(-2))),kmadd(At33L,kmul(Gt333,kmul(gtu33,ToReal(-2))),knmsub(At11L,kmadd(Gt113,gtu11,kmadd(Gt123,gtu12,kmul(Gt133,gtu13))),kmadd(gtu12,kmsub(At23L,kmul(Gt212,ToReal(-2)),kmul(At12L,Gt223)),knmsub(gtu11,kmadd(At23L,Gt211,kmadd(At33L,Gt311,kmul(At12L,Gt213))),kmadd(At33L,kmsub(Gt312,kmul(gtu12,ToReal(-2)),kmul(Gt322,gtu22)),kmadd(At23L,kmsub(Gt213,kmul(gtu13,ToReal(-3)),kmadd(Gt333,gtu23,kmul(Gt222,gtu22))),kmadd(JacPDstandardNth3trK,ToReal(-0.666666666666666666666666666667),kmadd(At33L,kmul(cdphi3,kmul(gtu33,ToReal(6))),kmadd(gtu13,kmsub(At33L,kmul(cdphi1,ToReal(6)),kmul(At12L,Gt233)),kmadd(gtu23,kmsub(At33L,kmul(cdphi2,ToReal(6)),kmul(At12L,Gt133)),kmadd(gtu23,kmsub(At23L,kmul(cdphi3,ToReal(6)),kmul(At22L,Gt233)),kmadd(gtu12,knmsub(At12L,Gt113,kmsub(At23L,kmsub(cdphi1,ToReal(6),Gt313),kmul(At22L,Gt213))),kmadd(gtu22,knmsub(At12L,Gt123,kmsub(At23L,kmsub(cdphi2,ToReal(6),Gt323),kmul(At22L,Gt223))),kmadd(At13L,kmadd(Gt113,kmul(gtu13,ToReal(-3)),kmadd(Gt123,kmul(gtu23,ToReal(-3)),kmadd(Gt112,kmul(gtu12,ToReal(-2)),kmadd(Gt133,kmul(gtu33,ToReal(-2)),knmsub(Gt122,gtu22,kmadd(gtu11,ksub(kmsub(cdphi1,ToReal(6),Gt313),Gt111),kmadd(gtu12,kmsub(cdphi2,ToReal(6),Gt323),kmul(gtu13,kmsub(cdphi3,ToReal(6),Gt333))))))))),kmul(S3,kmul(ToReal(-8),ToReal(Pi)))))))))))))))))))))))))))))); CCTK_REAL_VEC cSL = klog(detgt); @@ -934,64 +932,18 @@ static void ML_BSSN_constraints2_Body(cGH const * restrict const cctkGH, int con CCTK_REAL_VEC cAL = kmadd(At11L,gtu11,kmadd(At22L,gtu22,kmadd(At33L,gtu33,kmul(kmadd(At12L,gtu12,kmadd(At13L,gtu13,kmul(At23L,gtu23))),ToReal(2))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(cA[index],cAL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cS[index],cSL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt1[index],cXt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt2[index],cXt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(cXt3[index],cXt3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M1[index],M1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M2[index],M2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(M3[index],M3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(cA[index],cAL,elt_count); - vec_store_nta_partial_hi(cS[index],cSL,elt_count); - vec_store_nta_partial_hi(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_hi(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_hi(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_hi(M1[index],M1L,elt_count); - vec_store_nta_partial_hi(M2[index],M2L,elt_count); - vec_store_nta_partial_hi(M3[index],M3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(cA[index],cAL,elt_count); - vec_store_nta_partial_lo(cS[index],cSL,elt_count); - vec_store_nta_partial_lo(cXt1[index],cXt1L,elt_count); - vec_store_nta_partial_lo(cXt2[index],cXt2L,elt_count); - vec_store_nta_partial_lo(cXt3[index],cXt3L,elt_count); - vec_store_nta_partial_lo(M1[index],M1L,elt_count); - vec_store_nta_partial_lo(M2[index],M2L,elt_count); - vec_store_nta_partial_lo(M3[index],M3L,elt_count); - break; - } - vec_store_nta(cA[index],cAL); - vec_store_nta(cS[index],cSL); - vec_store_nta(cXt1[index],cXt1L); - vec_store_nta(cXt2[index],cXt2L); - vec_store_nta(cXt3[index],cXt3L); - vec_store_nta(M1[index],M1L); - vec_store_nta(M2[index],M2L); - vec_store_nta(M3[index],M3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(cA[index],cAL); + vec_store_nta_partial(cS[index],cSL); + vec_store_nta_partial(cXt1[index],cXt1L); + vec_store_nta_partial(cXt2[index],cXt2L); + vec_store_nta_partial(cXt3[index],cXt3L); + vec_store_nta_partial(M1[index],M1L); + vec_store_nta_partial(M2[index],M2L); + vec_store_nta_partial(M3[index],M3L); } - LC_ENDLOOP3VEC (ML_BSSN_constraints2); + LC_ENDLOOP3VEC(ML_BSSN_constraints2); } extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) @@ -1010,7 +962,18 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_cons_detg","ML_BSSN::ML_cons_Gamma","ML_BSSN::ML_cons_traceA","ML_BSSN::ML_curv","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_mom","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ML_BSSN::ML_cons_detg", + "ML_BSSN::ML_cons_Gamma", + "ML_BSSN::ML_cons_traceA", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_mom", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_constraints2", 11, groups); switch(fdOrder) @@ -1032,7 +995,7 @@ extern "C" void ML_BSSN_constraints2(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_constraints2_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_constraints2_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc index 985d026..96db9f9 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBase, + LC_LOOP3VEC(ML_BSSN_convertFromADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -325,104 +323,28 @@ static void ML_BSSN_convertFromADMBase_Body(cGH const * restrict const cctkGH, i CCTK_REAL_VEC beta3L = betazL; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); - vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); - vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); - vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); - vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); - vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); - vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); - vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); - vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); - vec_store_nta_partial_hi(phi[index],phiL,elt_count); - vec_store_nta_partial_hi(trK[index],trKL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); - vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); - vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); - vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); - vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); - vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); - vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); - vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); - vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); - vec_store_nta_partial_lo(phi[index],phiL,elt_count); - vec_store_nta_partial_lo(trK[index],trKL,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); - vec_store_nta(beta1[index],beta1L); - vec_store_nta(beta2[index],beta2L); - vec_store_nta(beta3[index],beta3L); - vec_store_nta(gt11[index],gt11L); - vec_store_nta(gt12[index],gt12L); - vec_store_nta(gt13[index],gt13L); - vec_store_nta(gt22[index],gt22L); - vec_store_nta(gt23[index],gt23L); - vec_store_nta(gt33[index],gt33L); - vec_store_nta(phi[index],phiL); - vec_store_nta(trK[index],trKL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); + vec_store_nta_partial(beta1[index],beta1L); + vec_store_nta_partial(beta2[index],beta2L); + vec_store_nta_partial(beta3[index],beta3L); + vec_store_nta_partial(gt11[index],gt11L); + vec_store_nta_partial(gt12[index],gt12L); + vec_store_nta_partial(gt13[index],gt13L); + vec_store_nta_partial(gt22[index],gt22L); + vec_store_nta_partial(gt23[index],gt23L); + vec_store_nta_partial(gt33[index],gt33L); + vec_store_nta_partial(phi[index],phiL); + vec_store_nta_partial(trK[index],trKL); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBase); } extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) @@ -441,7 +363,17 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBase", 10, groups); switch(fdOrder) @@ -459,7 +391,7 @@ extern "C" void ML_BSSN_convertFromADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertFromADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertFromADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc index b32e95b..37a355b 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc @@ -47,8 +47,6 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -85,9 +83,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -106,14 +104,14 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -127,9 +125,9 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -210,7 +208,7 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertFromADMBaseGamma, + LC_LOOP3VEC(ML_BSSN_convertFromADMBaseGamma, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -841,13 +839,13 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk if (ShiftBCoeff*ShiftGammaCoeff != 0) { B1L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta1,kmadd(beta2L,JacPDupwindNthAnti2beta1,kmadd(beta3L,JacPDupwindNthAnti3beta1,kmadd(JacPDupwindNthSymm1beta1,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta1,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL)); B2L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta2,kmadd(beta2L,JacPDupwindNthAnti2beta2,kmadd(beta3L,JacPDupwindNthAnti3beta2,kmadd(JacPDupwindNthSymm1beta2,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta2,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL)); B3L = - kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); + kmul(INV(kmul(theta,ToReal(ShiftGammaCoeff))),knmsub(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL)); } else { @@ -858,60 +856,17 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk B3L = ToReal(0); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(A[index],AL,elt_count); - vec_store_nta_partial_hi(B1[index],B1L,elt_count); - vec_store_nta_partial_hi(B2[index],B2L,elt_count); - vec_store_nta_partial_hi(B3[index],B3L,elt_count); - vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(A[index],AL,elt_count); - vec_store_nta_partial_lo(B1[index],B1L,elt_count); - vec_store_nta_partial_lo(B2[index],B2L,elt_count); - vec_store_nta_partial_lo(B3[index],B3L,elt_count); - vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); - vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); - vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); - break; - } - vec_store_nta(A[index],AL); - vec_store_nta(B1[index],B1L); - vec_store_nta(B2[index],B2L); - vec_store_nta(B3[index],B3L); - vec_store_nta(Xt1[index],Xt1L); - vec_store_nta(Xt2[index],Xt2L); - vec_store_nta(Xt3[index],Xt3L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(A[index],AL); + vec_store_nta_partial(B1[index],B1L); + vec_store_nta_partial(B2[index],B2L); + vec_store_nta_partial(B3[index],B3L); + vec_store_nta_partial(Xt1[index],Xt1L); + vec_store_nta_partial(Xt2[index],Xt2L); + vec_store_nta_partial(Xt3[index],Xt3L); } - LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBaseGamma); + LC_ENDLOOP3VEC(ML_BSSN_convertFromADMBaseGamma); } extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) @@ -930,7 +885,17 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_metric","ML_BSSN::ML_shift"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertFromADMBaseGamma", 10, groups); switch(fdOrder) @@ -952,7 +917,7 @@ extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertFromADMBaseGamma_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertFromADMBaseGamma_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc index 6afda7b..de1d9df 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBase, + LC_LOOP3VEC(ML_BSSN_convertToADMBase, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -289,96 +287,26 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int CCTK_REAL_VEC betazL = beta3L; - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alp[index],alpL,elt_count); - vec_store_nta_partial_hi(betax[index],betaxL,elt_count); - vec_store_nta_partial_hi(betay[index],betayL,elt_count); - vec_store_nta_partial_hi(betaz[index],betazL,elt_count); - vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); - vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); - vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); - vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); - vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); - vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); - vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); - vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); - vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); - vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); - vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); - vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alp[index],alpL,elt_count); - vec_store_nta_partial_lo(betax[index],betaxL,elt_count); - vec_store_nta_partial_lo(betay[index],betayL,elt_count); - vec_store_nta_partial_lo(betaz[index],betazL,elt_count); - vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); - vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); - vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); - vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); - vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); - vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); - vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); - vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); - vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); - vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); - vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); - vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); - break; - } - vec_store_nta(alp[index],alpL); - vec_store_nta(betax[index],betaxL); - vec_store_nta(betay[index],betayL); - vec_store_nta(betaz[index],betazL); - vec_store_nta(gxx[index],gxxL); - vec_store_nta(gxy[index],gxyL); - vec_store_nta(gxz[index],gxzL); - vec_store_nta(gyy[index],gyyL); - vec_store_nta(gyz[index],gyzL); - vec_store_nta(gzz[index],gzzL); - vec_store_nta(kxx[index],kxxL); - vec_store_nta(kxy[index],kxyL); - vec_store_nta(kxz[index],kxzL); - vec_store_nta(kyy[index],kyyL); - vec_store_nta(kyz[index],kyzL); - vec_store_nta(kzz[index],kzzL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alp[index],alpL); + vec_store_nta_partial(betax[index],betaxL); + vec_store_nta_partial(betay[index],betayL); + vec_store_nta_partial(betaz[index],betazL); + vec_store_nta_partial(gxx[index],gxxL); + vec_store_nta_partial(gxy[index],gxyL); + vec_store_nta_partial(gxz[index],gxzL); + vec_store_nta_partial(gyy[index],gyyL); + vec_store_nta_partial(gyz[index],gyzL); + vec_store_nta_partial(gzz[index],gzzL); + vec_store_nta_partial(kxx[index],kxxL); + vec_store_nta_partial(kxy[index],kxyL); + vec_store_nta_partial(kxz[index],kxzL); + vec_store_nta_partial(kyy[index],kyyL); + vec_store_nta_partial(kyz[index],kyzL); + vec_store_nta_partial(kzz[index],kzzL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBase); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBase); } extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) @@ -397,7 +325,17 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::curv","ADMBase::lapse","ADMBase::metric","ADMBase::shift","ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::curv", + "ADMBase::lapse", + "ADMBase::metric", + "ADMBase::shift", + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBase", 10, groups); switch(fdOrder) @@ -415,7 +353,7 @@ extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBase_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBase_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc index 9c0a861..8af669c 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShift.cc @@ -44,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -851,48 +849,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShift_Body(cGH const * restrict const CCTK_REAL_VEC dtbetazL = kmadd(kmadd(beta1L,JacPDupwindNthAnti1beta3,kmadd(beta2L,JacPDupwindNthAnti2beta3,kmadd(beta3L,JacPDupwindNthAnti3beta3,kmadd(JacPDupwindNthSymm1beta3,kfabs(beta1L),kmadd(JacPDupwindNthSymm2beta3,kfabs(beta2L),kmul(JacPDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),IfThen(harmonicShift,kmul(alphaL,kmul(phiL,kmul(ToReal(0.5),kmadd(phiL,kmul(kmadd(gtu13,JacPDstandardNth1alpha,kmadd(gtu23,JacPDstandardNth2alpha,kmul(gtu33,JacPDstandardNth3alpha))),ToReal(-2)),kmul(alphaL,kmadd(phiL,kmul(kmadd(JacPDstandardNth3gt11,SQR(gtu13),kmul(JacPDstandardNth3gt22,SQR(gtu23))),ToReal(2)),kmadd(gtu23,kmadd(JacPDstandardNth2phi,ToReal(2),kmul(phiL,kmadd(gtu22,JacPDstandardNth2gt22,kmadd(gtu33,JacPDstandardNth2gt33,kmsub(kmadd(gtu11,JacPDstandardNth1gt12,kmadd(gtu12,JacPDstandardNth1gt22,kmul(gtu33,JacPDstandardNth3gt23))),ToReal(2),kmul(gtu11,JacPDstandardNth2gt11)))))),kmadd(gtu33,kmadd(JacPDstandardNth3phi,ToReal(2),kmul(phiL,kmadd(gtu33,JacPDstandardNth3gt33,knmsub(gtu22,JacPDstandardNth3gt22,kmadd(kmadd(gtu22,JacPDstandardNth2gt23,kmul(gtu12,kadd(JacPDstandardNth1gt23,ksub(JacPDstandardNth2gt13,JacPDstandardNth3gt12)))),ToReal(2),kmul(gtu11,kmsub(JacPDstandardNth1gt13,ToReal(2),JacPDstandardNth3gt11))))))),kmul(gtu13,kmadd(JacPDstandardNth1phi,ToReal(2),kmul(phiL,kmadd(gtu11,JacPDstandardNth1gt11,kmadd(gtu12,kmul(JacPDstandardNth2gt11,ToReal(2)),kmadd(gtu22,kmsub(JacPDstandardNth2gt12,ToReal(2),JacPDstandardNth1gt22),kmadd(gtu33,kmadd(JacPDstandardNth3gt13,ToReal(2),JacPDstandardNth1gt33),kmul(gtu23,kmul(JacPDstandardNth3gt12,ToReal(4)))))))))))))))))),kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))))); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) @@ -911,7 +875,19 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_log_confac","ML_BSSN::ML_metric","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_log_confac", + "ML_BSSN::ML_metric", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShift", 12, groups); switch(fdOrder) @@ -933,7 +909,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverInterior(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShift_Body); + GenericFD_LoopOverInterior(cctkGH, ML_BSSN_convertToADMBaseDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc index 37ab562..67e687d 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc @@ -44,8 +44,6 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -82,9 +80,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -103,14 +101,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -124,9 +122,9 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -207,7 +205,7 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -285,48 +283,14 @@ static void ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body(cGH const * restri kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseDtLapseShiftBoundary); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseDtLapseShiftBoundary); } extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) @@ -345,7 +309,17 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseDtLapseShiftBoundary", 10, groups); switch(fdOrder) @@ -363,7 +337,7 @@ extern "C" void ML_BSSN_convertToADMBaseDtLapseShiftBoundary(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverBoundaryWithGhosts(cctkGH, &ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); + GenericFD_LoopOverBoundaryWithGhosts(cctkGH, ML_BSSN_convertToADMBaseDtLapseShiftBoundary_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc index f030741..5944c01 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc @@ -29,8 +29,6 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -270,48 +268,14 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c kmul(theta,kmul(kadd(Xt3L,kmadd(ksub(B3L,Xt3L),ToReal(ShiftBCoeff),kmul(beta3L,kmul(eta,kmul(ToReal(BetaDriver),kadd(ToReal(-1),ToReal(ShiftBCoeff))))))),ToReal(ShiftGammaCoeff))); } - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); - vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); - vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); - vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); - break; - } - vec_store_nta(dtalp[index],dtalpL); - vec_store_nta(dtbetax[index],dtbetaxL); - vec_store_nta(dtbetay[index],dtbetayL); - vec_store_nta(dtbetaz[index],dtbetazL); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(dtalp[index],dtalpL); + vec_store_nta_partial(dtbetax[index],dtbetaxL); + vec_store_nta_partial(dtbetay[index],dtbetayL); + vec_store_nta_partial(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC(ML_BSSN_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) @@ -330,7 +294,17 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ADMBase::dtlapse","ADMBase::dtshift","grid::coordinates","Grid::coordinates","ML_BSSN::ML_dtlapse","ML_BSSN::ML_dtshift","ML_BSSN::ML_Gamma","ML_BSSN::ML_lapse","ML_BSSN::ML_shift","ML_BSSN::ML_trace_curv"}; + const char *const groups[] = { + "ADMBase::dtlapse", + "ADMBase::dtshift", + "grid::coordinates", + "Grid::coordinates", + "ML_BSSN::ML_dtlapse", + "ML_BSSN::ML_dtshift", + "ML_BSSN::ML_Gamma", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_shift", + "ML_BSSN::ML_trace_curv"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_convertToADMBaseFakeDtLapseShift", 10, groups); switch(fdOrder) @@ -348,7 +322,7 @@ extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_convertToADMBaseFakeDtLapseShift_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/ML_BSSN_enforce.cc b/ML_BSSN/src/ML_BSSN_enforce.cc index ef5cd15..50ef72a 100644 --- a/ML_BSSN/src/ML_BSSN_enforce.cc +++ b/ML_BSSN/src/ML_BSSN_enforce.cc @@ -29,8 +29,6 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di DECLARE_CCTK_PARAMETERS; - /* Declare finite differencing variables */ - /* Include user-supplied include files */ /* Initialise finite differencing variables */ @@ -67,9 +65,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); - CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); - CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.00694444444444444444444444444444)); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(kmul(dy,dz)),ToReal(0.00694444444444444444444444444444)); CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); @@ -88,14 +86,14 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o2dx = kmul(INV(dx),ToReal(0.5)); CCTK_REAL_VEC const p1o2dy = kmul(INV(dy),ToReal(0.5)); CCTK_REAL_VEC const p1o2dz = kmul(INV(dz),ToReal(0.5)); - CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); - CCTK_REAL_VEC const p1o3600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.000277777777777777777777777777778))); + CCTK_REAL_VEC const p1o3600dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.000277777777777777777777777777778)); + CCTK_REAL_VEC const p1o3600dydz = kmul(INV(kmul(dy,dz)),ToReal(0.000277777777777777777777777777778)); CCTK_REAL_VEC const p1o4dx = kmul(INV(dx),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.25))); - CCTK_REAL_VEC const p1o4dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dxdy = kmul(INV(kmul(dx,dy)),ToReal(0.25)); + CCTK_REAL_VEC const p1o4dxdz = kmul(INV(kmul(dx,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dy = kmul(INV(dy),ToReal(0.25)); - CCTK_REAL_VEC const p1o4dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.25))); + CCTK_REAL_VEC const p1o4dydz = kmul(INV(kmul(dy,dz)),ToReal(0.25)); CCTK_REAL_VEC const p1o4dz = kmul(INV(dz),ToReal(0.25)); CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); @@ -109,9 +107,9 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); - CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); - CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(kmul(dx,dy)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(kmul(dx,dz)),ToReal(1.41723356009070294784580498866e-6)); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(kmul(dy,dz)),ToReal(1.41723356009070294784580498866e-6)); CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); @@ -192,7 +190,7 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3VEC (ML_BSSN_enforce, + LC_LOOP3VEC(ML_BSSN_enforce, i,j,k, imin[0],imin[1],imin[2], imax[0],imax[1],imax[2], cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], CCTK_REAL_VEC_SIZE) @@ -280,60 +278,17 @@ static void ML_BSSN_enforce_Body(cGH const * restrict const cctkGH, int const di alphaL = kfmax(alphaL,ToReal(MinimumLapse)); - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count_lo = lc_imin-i; - ptrdiff_t const elt_count_hi = lc_imax-i; - vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); - vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); - break; - } - - /* If necessary, store only partial vectors after the first iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) - { - ptrdiff_t const elt_count = lc_imin-i; - vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); - vec_store_nta_partial_hi(At11[index],At11L,elt_count); - vec_store_nta_partial_hi(At12[index],At12L,elt_count); - vec_store_nta_partial_hi(At13[index],At13L,elt_count); - vec_store_nta_partial_hi(At22[index],At22L,elt_count); - vec_store_nta_partial_hi(At23[index],At23L,elt_count); - vec_store_nta_partial_hi(At33[index],At33L,elt_count); - continue; - } - - /* If necessary, store only partial vectors after the last iteration */ - - if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) - { - ptrdiff_t const elt_count = lc_imax-i; - vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); - vec_store_nta_partial_lo(At11[index],At11L,elt_count); - vec_store_nta_partial_lo(At12[index],At12L,elt_count); - vec_store_nta_partial_lo(At13[index],At13L,elt_count); - vec_store_nta_partial_lo(At22[index],At22L,elt_count); - vec_store_nta_partial_lo(At23[index],At23L,elt_count); - vec_store_nta_partial_lo(At33[index],At33L,elt_count); - break; - } - vec_store_nta(alpha[index],alphaL); - vec_store_nta(At11[index],At11L); - vec_store_nta(At12[index],At12L); - vec_store_nta(At13[index],At13L); - vec_store_nta(At22[index],At22L); - vec_store_nta(At23[index],At23L); - vec_store_nta(At33[index],At33L); + /* Copy local copies back to grid functions */ + vec_store_partial_prepare(i,lc_imin,lc_imax); + vec_store_nta_partial(alpha[index],alphaL); + vec_store_nta_partial(At11[index],At11L); + vec_store_nta_partial(At12[index],At12L); + vec_store_nta_partial(At13[index],At13L); + vec_store_nta_partial(At22[index],At22L); + vec_store_nta_partial(At23[index],At23L); + vec_store_nta_partial(At33[index],At33L); } - LC_ENDLOOP3VEC (ML_BSSN_enforce); + LC_ENDLOOP3VEC(ML_BSSN_enforce); } extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) @@ -352,7 +307,10 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) return; } - const char *groups[] = {"ML_BSSN::ML_curv","ML_BSSN::ML_lapse","ML_BSSN::ML_metric"}; + const char *const groups[] = { + "ML_BSSN::ML_curv", + "ML_BSSN::ML_lapse", + "ML_BSSN::ML_metric"}; GenericFD_AssertGroupStorage(cctkGH, "ML_BSSN_enforce", 3, groups); switch(fdOrder) @@ -370,7 +328,7 @@ extern "C" void ML_BSSN_enforce(CCTK_ARGUMENTS) break; } - GenericFD_LoopOverEverything(cctkGH, &ML_BSSN_enforce_Body); + GenericFD_LoopOverEverything(cctkGH, ML_BSSN_enforce_Body); if (verbose > 1) { diff --git a/ML_BSSN/src/make.code.defn b/ML_BSSN/src/make.code.defn index 5fdc3cf..39c368f 100644 --- a/ML_BSSN/src/make.code.defn +++ b/ML_BSSN/src/make.code.defn @@ -1,3 +1,3 @@ # File produced by Kranc -SRCS = Startup.cc RegisterMoL.cc RegisterSymmetries.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc +SRCS = Startup.cc RegisterSymmetries.cc RegisterMoL.cc ML_BSSN_Minkowski.cc ML_BSSN_convertFromADMBase.cc ML_BSSN_InitGamma.cc ML_BSSN_convertFromADMBaseGamma.cc ML_BSSN_RHS1.cc ML_BSSN_RHS2.cc ML_BSSN_Dissipation.cc ML_BSSN_Advect.cc ML_BSSN_InitRHS.cc ML_BSSN_RHSStaticBoundary.cc ML_BSSN_enforce.cc ML_BSSN_boundary.cc ML_BSSN_convertToADMBase.cc ML_BSSN_convertToADMBaseDtLapseShift.cc ML_BSSN_convertToADMBaseDtLapseShiftBoundary.cc ML_BSSN_convertToADMBaseFakeDtLapseShift.cc ML_BSSN_constraints1.cc ML_BSSN_constraints2.cc Boundaries.cc |