From e0ddb73239c73c6de42a01204194173ce65ebff4 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 16 Jan 2013 14:46:17 -0500 Subject: LoopControl: Rewrite Rewrite code in C++. Remove dependency on GSL. Modify algorithm; now traverses arrays bottom-up (by splitting the looping region into equal-sized blocks) instead of top-down (splitting the region into a certain number of blocks) Make multi-threading dynamic Support SMT (hyper-threading), i.e. threads which share the same cache --- Carpet/LoopControl/README | 82 +- Carpet/LoopControl/configuration.ccl | 8 +- Carpet/LoopControl/interface.ccl | 13 + .../par/Bench_McLachlan_Carpet_1lev.hill.par | 134 -- .../par/Bench_McLachlan_Carpet_1lev.legacy.par | 136 -- .../par/Bench_McLachlan_Carpet_1lev.par | 132 -- .../par/Bench_McLachlan_Carpet_1lev.standard.par | 132 -- .../LoopControl/par/WaveToy_Carpet_1lev.hill.par | 69 - .../LoopControl/par/WaveToy_Carpet_1lev.legacy.par | 69 - .../par/WaveToy_Carpet_1lev.standard.par | 67 - Carpet/LoopControl/par/WaveToy_LoopControl.par | 6 - Carpet/LoopControl/par/WaveToy_PUGH.standard.par | 63 - .../par/bench-minkowski-carpet-1lev-legacy.par | 126 ++ .../par/bench-minkowski-carpet-1lev-test.par | 126 ++ .../par/bench-minkowski-carpet-1lev-tiled.par | 126 ++ Carpet/LoopControl/param.ccl | 195 +-- Carpet/LoopControl/schedule.ccl | 35 +- Carpet/LoopControl/src/lc_auto.c | 140 -- Carpet/LoopControl/src/lc_auto.h | 31 - Carpet/LoopControl/src/lc_get_type_sizes.F90 | 21 - Carpet/LoopControl/src/lc_hill.c | 345 ----- Carpet/LoopControl/src/lc_hill.h | 38 - Carpet/LoopControl/src/lc_siman.c | 198 --- Carpet/LoopControl/src/lc_siman.h | 60 - Carpet/LoopControl/src/loopcontrol.F90 | 70 +- Carpet/LoopControl/src/loopcontrol.c | 1431 -------------------- Carpet/LoopControl/src/loopcontrol.cc | 827 +++++++++++ Carpet/LoopControl/src/loopcontrol.h | 611 ++++----- Carpet/LoopControl/src/loopcontrol_fortran.h | 207 +-- Carpet/LoopControl/src/loopcontrol_types.F90 | 84 +- Carpet/LoopControl/src/make.code.defn | 2 +- Carpet/LoopControl/src/type_sizes.F90 | 21 + Carpet/LoopControl/src/wavetoy-loopcontrol.c | 274 ---- .../test/bench-minkowski-carpet-1lev-test.par | 124 ++ .../bench-minkowski-carpet-1lev-test/alp.d.asc | 168 +++ .../bench-minkowski-carpet-1lev-test/alp.x.asc | 98 ++ .../bench-minkowski-carpet-1lev-test/alp.y.asc | 98 ++ .../bench-minkowski-carpet-1lev-test/alp.z.asc | 110 ++ .../bench-minkowski-carpet-1lev-test.par | 124 ++ .../carpetlib-memory-statistics | 5 + 40 files changed, 2485 insertions(+), 4121 deletions(-) delete mode 100644 Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.hill.par delete mode 100644 Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.legacy.par delete mode 100644 Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.par delete mode 100644 Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.standard.par delete mode 100644 Carpet/LoopControl/par/WaveToy_Carpet_1lev.hill.par delete mode 100644 Carpet/LoopControl/par/WaveToy_Carpet_1lev.legacy.par delete mode 100644 Carpet/LoopControl/par/WaveToy_Carpet_1lev.standard.par delete mode 100644 Carpet/LoopControl/par/WaveToy_LoopControl.par delete mode 100644 Carpet/LoopControl/par/WaveToy_PUGH.standard.par create mode 100644 Carpet/LoopControl/par/bench-minkowski-carpet-1lev-legacy.par create mode 100644 Carpet/LoopControl/par/bench-minkowski-carpet-1lev-test.par create mode 100644 Carpet/LoopControl/par/bench-minkowski-carpet-1lev-tiled.par delete mode 100644 Carpet/LoopControl/src/lc_auto.c delete mode 100644 Carpet/LoopControl/src/lc_auto.h delete mode 100644 Carpet/LoopControl/src/lc_get_type_sizes.F90 delete mode 100644 Carpet/LoopControl/src/lc_hill.c delete mode 100644 Carpet/LoopControl/src/lc_hill.h delete mode 100644 Carpet/LoopControl/src/lc_siman.c delete mode 100644 Carpet/LoopControl/src/lc_siman.h delete mode 100644 Carpet/LoopControl/src/loopcontrol.c create mode 100644 Carpet/LoopControl/src/loopcontrol.cc create mode 100644 Carpet/LoopControl/src/type_sizes.F90 delete mode 100644 Carpet/LoopControl/src/wavetoy-loopcontrol.c create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.d.asc create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.x.asc create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.y.asc create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.z.asc create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/bench-minkowski-carpet-1lev-test.par create mode 100644 Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/carpetlib-memory-statistics diff --git a/Carpet/LoopControl/README b/Carpet/LoopControl/README index f3c5055b2..b3537fba4 100644 --- a/Carpet/LoopControl/README +++ b/Carpet/LoopControl/README @@ -1,86 +1,10 @@ Cactus Code Thorn LoopControl -Author(s) : Erik Schnetter -Maintainer(s): Erik Schnetter +Author(s) : Erik Schnetter +Maintainer(s): Erik Schnetter Licence : GPLv2+ -------------------------------------------------------------------------- 1. Purpose Iterate over multi-dimensional arrays in an efficient manner, using -OpenMP (if available) and cach-aware loop tiling. - - - -Comments and Ideas: - -Simulated annealing: Does not seem to work reliably. Needs many -iterations, gets stuck in local minima. Does not handle topology -changes well. Doesn't take past performance into account. - - - -Rewrite in C++? This would simplify the data types. - - - -Try loop skewing (see John Shalf's paper). - - - -Assumption: Each iteration is cheap, so that gradual tuning works. - -Design choice: No explicit tuning, works as black box. - -Assumpton: There are so many configurations (which change at run time) -and so many parameter choices (which would need to be tested for each -configuration) that it is not feasible to test all possibilities. - - - - -Idea for a better algorithm: - -Combine several strategies: - -1.a. Robust default setting: - - Topology: split in z direction - - Tiling: [N,1,] (test this more) - -2.a. Local 1D tiling optimisation (based on line searches): - - Globally define some maximum step size - - Choose a direction (i, j, or k) - - Determine a search interval in that direction - - Use exponential search to find interval boundaries - - Use arithmetic search to find minimum - -2.b. Local full tiling optimisation: - - Perform a series of 1D optimisations, until a local minimum in - for all directions has been found - -2.c. Change topology: - - Choose a new, "nearby" tiling - - Start with a good tiling from the previous topology - - Then perform a full tiling optimisation - -2.d. Choose a random new tiling - - Then perform a full tiling optimisation - -2.e. Choose a random new topology - - Then choose a random new tiling - -3.a. Whenever a test is done, repeat it a few times to avoid random - events - - Maybe don't repeat it right away, but wait some time - -3.b. If a value is old, forget it with a certain probability - - Need to remember a "last updated" field with statistics - -4.a. Offer an option to sample a wide range of configurations to give - the user an overview - - Note that a full sampling is too expensive. - - Use a hierarchical strategy for this? - - Use random sampling for this? - -5. Offer the parallelisation mechanism, or the tiling mechanism - separately, without actually looping over the array or over the - tiles. +OpenMP (if available) and cache-aware loop tiling. diff --git a/Carpet/LoopControl/configuration.ccl b/Carpet/LoopControl/configuration.ccl index b1be54634..ec31d6f78 100644 --- a/Carpet/LoopControl/configuration.ccl +++ b/Carpet/LoopControl/configuration.ccl @@ -1,11 +1,13 @@ # Configuration definition for thorn LoopControl -REQUIRES GSL - -PROVIDES LoopControl +OPTIONAL CycleClock { } OPTIONAL Vectors { } + +PROVIDES LoopControl +{ +} diff --git a/Carpet/LoopControl/interface.ccl b/Carpet/LoopControl/interface.ccl index 8769d737b..c05c6f843 100644 --- a/Carpet/LoopControl/interface.ccl +++ b/Carpet/LoopControl/interface.ccl @@ -2,6 +2,19 @@ IMPLEMENTS: LoopControl +INHERITS: CycleClock + INCLUDE HEADER: loopcontrol.h IN loopcontrol.h +USES INCLUDE HEADER: cycleclock.h USES INCLUDE HEADER: vectors.h + + + +CCTK_INT FUNCTION GetNumSMTThreads() +USES FUNCTION GetNumSMTThreads + +CCTK_INT FUNCTION GetCacheInfo1(CCTK_INT ARRAY OUT linesizes, \ + CCTK_INT ARRAY OUT strides, \ + CCTK_INT IN max_num_cache_levels) +USES FUNCTION GetCacheInfo1 diff --git a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.hill.par b/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.hill.par deleted file mode 100644 index 93fb29489..000000000 --- a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.hill.par +++ /dev/null @@ -1,134 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" - -Cactus::cctk_itlast = @ITERATIONS@ - - - -ActiveThorns = "Fortran" - - - -ActiveThorns = "LoopControl" - -LoopControl::printstats = yes - -LoopControl::legacy_init = no -LoopControl::use_random_restart_hill_climbing = yes - - - -ActiveThorns = "IOUtil" - -IO::out_dir = $parfile - - - -ActiveThorns = "InitBase" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::domain_from_coordbase = yes - -driver::ghost_size = 3 - -Carpet::init_fill_timelevels = yes - -CarpetLib::print_timestats_every = @ITERATIONS@ -CarpetLib::print_memstats_every = @ITERATIONS@ - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -CoordBase::domainsize = "minmax" -CoordBase::xmin = 0.0 -CoordBase::ymin = 0.0 -CoordBase::zmin = 0.0 -CoordBase::xmax = 1.0 -CoordBase::ymax = 1.0 -CoordBase::zmax = 1.0 - -CoordBase::spacing = "numcells" -CoordBase::ncells_x = @NCELLSI@ -CoordBase::ncells_y = @NCELLSJ@ -CoordBase::ncells_z = @NCELLSK@ - -CartGrid3D::type = "coordbase" -CartGrid3D::domain = "octant" -CartGrid3D::avoid_originx = no -CartGrid3D::avoid_originy = no -CartGrid3D::avoid_originz = no - -CoordBase::boundary_size_x_lower = 3 -CoordBase::boundary_size_y_lower = 3 -CoordBase::boundary_size_z_lower = 3 -CoordBase::boundary_shiftout_x_lower = 1 -CoordBase::boundary_shiftout_y_lower = 1 -CoordBase::boundary_shiftout_z_lower = 1 - -CoordBase::boundary_size_x_upper = 3 -CoordBase::boundary_size_y_upper = 3 -CoordBase::boundary_size_z_upper = 3 - - - -ActiveThorns = "MoL NaNChecker Time" - -MoL::ODE_Method = "RK4" -MoL::MoL_Intermediate_Steps = 4 -MoL::MoL_Num_Scratch_Levels = 1 - -Time::dtfac = 0.4 - - - -ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" - -ADMBase::initial_data = "Cartesian Minkowski" -ADMBase::initial_lapse = "one" -ADMBase::initial_shift = "zero" -ADMBase::initial_dtlapse = "zero" -ADMBase::initial_dtshift = "zero" - - - -ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" - -ADMBase::evolution_method = "ML_BSSN" -ADMBase::lapse_evolution_method = "ML_BSSN" -ADMBase::shift_evolution_method = "ML_BSSN" - -ML_BSSN::my_boundary_condition = "Minkowski" - -ML_BSSN::harmonicN = 1 # 1+log -ML_BSSN::harmonicF = 2.0 # 1+log -ML_BSSN::ShiftGammaCoeff = 0.75 -ML_BSSN::BetaDriver = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = @ITERATIONS@ -#IOBasic::outInfo_vars = "ADMBase::alp" - - - -#ActiveThorns = "CarpetIOASCII" -# -#IOASCII::out0D_every = @ITERATIONS@ -#IOASCII::out0D_vars = "Carpet::timing" - - - -ActiveThorns = "TimerReport" - -TimerReport::output_all_timers = yes -TimerReport::all_timers_clock = "cycle" -TimerReport::out_every = @ITERATIONS@ -TimerReport::out_filename = "TimerReport" -TimerReport::output_all_timers_together = yes -TimerReport::output_all_timers_readable = yes -TimerReport::n_top_timers = 20 diff --git a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.legacy.par b/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.legacy.par deleted file mode 100644 index 09be6185d..000000000 --- a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.legacy.par +++ /dev/null @@ -1,136 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" - -Cactus::cctk_itlast = @ITERATIONS@ - - - -ActiveThorns = "Fortran" - - - -ActiveThorns = "LoopControl" - -LoopControl::printstats = yes - -LoopControl::legacy_init = yes -LoopControl::use_random_restart_hill_climbing = no - - - -ActiveThorns = "IOUtil" - -IO::out_dir = $parfile - - - -ActiveThorns = "InitBase" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::domain_from_coordbase = yes - -driver::ghost_size = 3 - -Carpet::init_fill_timelevels = yes - -CarpetLib::avoid_arraysize_bytes = 0 - -CarpetLib::print_timestats_every = @ITERATIONS@ -CarpetLib::print_memstats_every = @ITERATIONS@ - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -CoordBase::domainsize = "minmax" -CoordBase::xmin = 0.0 -CoordBase::ymin = 0.0 -CoordBase::zmin = 0.0 -CoordBase::xmax = 1.0 -CoordBase::ymax = 1.0 -CoordBase::zmax = 1.0 - -CoordBase::spacing = "numcells" -CoordBase::ncells_x = @NCELLSI@ -CoordBase::ncells_y = @NCELLSJ@ -CoordBase::ncells_z = @NCELLSK@ - -CartGrid3D::type = "coordbase" -CartGrid3D::domain = "octant" -CartGrid3D::avoid_originx = no -CartGrid3D::avoid_originy = no -CartGrid3D::avoid_originz = no - -CoordBase::boundary_size_x_lower = 3 -CoordBase::boundary_size_y_lower = 3 -CoordBase::boundary_size_z_lower = 3 -CoordBase::boundary_shiftout_x_lower = 1 -CoordBase::boundary_shiftout_y_lower = 1 -CoordBase::boundary_shiftout_z_lower = 1 - -CoordBase::boundary_size_x_upper = 3 -CoordBase::boundary_size_y_upper = 3 -CoordBase::boundary_size_z_upper = 3 - - - -ActiveThorns = "MoL NaNChecker Time" - -MoL::ODE_Method = "RK4" -MoL::MoL_Intermediate_Steps = 4 -MoL::MoL_Num_Scratch_Levels = 1 - -Time::dtfac = 0.4 - - - -ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" - -ADMBase::initial_data = "Cartesian Minkowski" -ADMBase::initial_lapse = "one" -ADMBase::initial_shift = "zero" -ADMBase::initial_dtlapse = "zero" -ADMBase::initial_dtshift = "zero" - - - -ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" - -ADMBase::evolution_method = "ML_BSSN" -ADMBase::lapse_evolution_method = "ML_BSSN" -ADMBase::shift_evolution_method = "ML_BSSN" - -ML_BSSN::my_boundary_condition = "Minkowski" - -ML_BSSN::harmonicN = 1 # 1+log -ML_BSSN::harmonicF = 2.0 # 1+log -ML_BSSN::ShiftGammaCoeff = 0.75 -ML_BSSN::BetaDriver = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = @ITERATIONS@ -#IOBasic::outInfo_vars = "ADMBase::alp" - - - -#ActiveThorns = "CarpetIOASCII" -# -#IOASCII::out0D_every = @ITERATIONS@ -#IOASCII::out0D_vars = "Carpet::timing" - - - -ActiveThorns = "TimerReport" - -TimerReport::output_all_timers = yes -TimerReport::all_timers_clock = "cycle" -TimerReport::out_every = @ITERATIONS@ -TimerReport::out_filename = "TimerReport" -TimerReport::output_all_timers_together = yes -TimerReport::output_all_timers_readable = yes -TimerReport::n_top_timers = 20 diff --git a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.par b/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.par deleted file mode 100644 index ab607feca..000000000 --- a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.par +++ /dev/null @@ -1,132 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = @ITERATIONS@ - - - -ActiveThorns = "Fortran" - - - -ActiveThorns = "LoopControl" - -#LoopControl::printstats = yes - -LoopControl::legacy_init = @LEGACY_INIT@ -LoopControl::use_random_restart_hill_climbing = @HILL_CLIMBING@ - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -ActiveThorns = "InitBase" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::domain_from_coordbase = yes - -driver::ghost_size = 3 - -Carpet::init_fill_timelevels = yes - -CarpetLib::combine_recompose = yes - -#CarpetLib::print_timestats_every = 1 -CarpetLib::print_memstats_every = @ITERATIONS@ - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -CoordBase::domainsize = "minmax" -CoordBase::xmin = 0.0 -CoordBase::ymin = 0.0 -CoordBase::zmin = 0.0 -CoordBase::xmax = 1.0 -CoordBase::ymax = 1.0 -CoordBase::zmax = 1.0 - -CoordBase::spacing = "numcells" -CoordBase::ncells_x = @NCELLSI@ -CoordBase::ncells_y = @NCELLSJ@ -CoordBase::ncells_z = @NCELLSK@ - -CartGrid3D::type = "coordbase" -CartGrid3D::domain = "octant" -CartGrid3D::avoid_originx = no -CartGrid3D::avoid_originy = no -CartGrid3D::avoid_originz = no - -CoordBase::boundary_size_x_lower = 3 -CoordBase::boundary_size_y_lower = 3 -CoordBase::boundary_size_z_lower = 3 -CoordBase::boundary_shiftout_x_lower = 1 -CoordBase::boundary_shiftout_y_lower = 1 -CoordBase::boundary_shiftout_z_lower = 1 - -CoordBase::boundary_size_x_upper = 3 -CoordBase::boundary_size_y_upper = 3 -CoordBase::boundary_size_z_upper = 3 - - - -ActiveThorns = "MoL NaNChecker Time" - -MoL::ODE_Method = "RK4" -MoL::MoL_Intermediate_Steps = 4 -MoL::MoL_Num_Scratch_Levels = 1 - -Time::dtfac = 0.4 - - - -ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" - -ADMBase::initial_data = "Cartesian Minkowski" -ADMBase::initial_lapse = "one" -ADMBase::initial_shift = "zero" -ADMBase::initial_dtlapse = "zero" -ADMBase::initial_dtshift = "zero" - - - -ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" - -ADMBase::evolution_method = "ML_BSSN" -ADMBase::lapse_evolution_method = "ML_BSSN" -ADMBase::shift_evolution_method = "ML_BSSN" - -ML_BSSN::my_boundary_condition = "Minkowski" - -ML_BSSN::harmonicN = 1 # 1+log -ML_BSSN::harmonicF = 2.0 # 1+log -ML_BSSN::ShiftGammaCoeff = 0.75 -ML_BSSN::BetaDriver = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = @ITERATIONS@ -#IOBasic::outInfo_vars = "ADMBase::alp" - - - -#ActiveThorns = "CarpetIOASCII" -# -#IOASCII::out0D_every = @ITERATIONS@ -#IOASCII::out0D_vars = "Carpet::timing" - - - -ActiveThorns = "TimerReport" - -TimerReport::output_all_timers = yes -TimerReport::all_timers_clock = "cycle" diff --git a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.standard.par b/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.standard.par deleted file mode 100644 index 690b37c26..000000000 --- a/Carpet/LoopControl/par/Bench_McLachlan_Carpet_1lev.standard.par +++ /dev/null @@ -1,132 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = @ITERATIONS@ - - - -ActiveThorns = "Fortran" - - - -ActiveThorns = "LoopControl" - -#LoopControl::printstats = yes - -LoopControl::legacy_init = no -LoopControl::use_random_restart_hill_climbing = no - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -ActiveThorns = "InitBase" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::domain_from_coordbase = yes - -driver::ghost_size = 3 - -Carpet::init_fill_timelevels = yes - -CarpetLib::combine_recompose = yes - -#CarpetLib::print_timestats_every = 1 -CarpetLib::print_memstats_every = @ITERATIONS@ - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -CoordBase::domainsize = "minmax" -CoordBase::xmin = 0.0 -CoordBase::ymin = 0.0 -CoordBase::zmin = 0.0 -CoordBase::xmax = 1.0 -CoordBase::ymax = 1.0 -CoordBase::zmax = 1.0 - -CoordBase::spacing = "numcells" -CoordBase::ncells_x = @NCELLSI@ -CoordBase::ncells_y = @NCELLSJ@ -CoordBase::ncells_z = @NCELLSK@ - -CartGrid3D::type = "coordbase" -CartGrid3D::domain = "octant" -CartGrid3D::avoid_originx = no -CartGrid3D::avoid_originy = no -CartGrid3D::avoid_originz = no - -CoordBase::boundary_size_x_lower = 3 -CoordBase::boundary_size_y_lower = 3 -CoordBase::boundary_size_z_lower = 3 -CoordBase::boundary_shiftout_x_lower = 1 -CoordBase::boundary_shiftout_y_lower = 1 -CoordBase::boundary_shiftout_z_lower = 1 - -CoordBase::boundary_size_x_upper = 3 -CoordBase::boundary_size_y_upper = 3 -CoordBase::boundary_size_z_upper = 3 - - - -ActiveThorns = "MoL NaNChecker Time" - -MoL::ODE_Method = "RK4" -MoL::MoL_Intermediate_Steps = 4 -MoL::MoL_Num_Scratch_Levels = 1 - -Time::dtfac = 0.4 - - - -ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" - -ADMBase::initial_data = "Cartesian Minkowski" -ADMBase::initial_lapse = "one" -ADMBase::initial_shift = "zero" -ADMBase::initial_dtlapse = "zero" -ADMBase::initial_dtshift = "zero" - - - -ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" - -ADMBase::evolution_method = "ML_BSSN" -ADMBase::lapse_evolution_method = "ML_BSSN" -ADMBase::shift_evolution_method = "ML_BSSN" - -ML_BSSN::my_boundary_condition = "Minkowski" - -ML_BSSN::harmonicN = 1 # 1+log -ML_BSSN::harmonicF = 2.0 # 1+log -ML_BSSN::ShiftGammaCoeff = 0.75 -ML_BSSN::BetaDriver = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = @ITERATIONS@ -#IOBasic::outInfo_vars = "ADMBase::alp" - - - -#ActiveThorns = "CarpetIOASCII" -# -#IOASCII::out0D_every = @ITERATIONS@ -#IOASCII::out0D_vars = "Carpet::timing" - - - -ActiveThorns = "TimerReport" - -TimerReport::output_all_timers = yes -TimerReport::all_timers_clock = "cycle" diff --git a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.hill.par b/Carpet/LoopControl/par/WaveToy_Carpet_1lev.hill.par deleted file mode 100644 index 1dbeec5b1..000000000 --- a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.hill.par +++ /dev/null @@ -1,69 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of WaveToy using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = 10000 - - - -#ActiveThorns = "MPIClock" - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -Activethorns = "LoopControl" - -LoopControl::printstats = yes -#LoopControl::verbose = yes -#LoopControl::debug = yes - -LoopControl::use_random_restart_hill_climbing = yes - - - -ActiveThorns = "InitBase" - -InitBase::initial_data_setup_method = "init_two_levels" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::constant_load_per_processor = yes -driver::global_nsize = 80 -driver::ghost_size = 1 - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -grid::type = "box" - - - -ActiveThorns = "IDScalarWaveC WaveToyC" - -WaveToy::bound = "zero" - - - -ActiveThorns = "Time" - -Time::dtfac = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = 1000 -IOBasic::outInfo_reductions = "maximum" -IOBasic::outInfo_vars = " - Carpet::grid_points_per_second - Carpet::grid_point_updates_count - Carpet::time_total - Carpet::time_computing -" diff --git a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.legacy.par b/Carpet/LoopControl/par/WaveToy_Carpet_1lev.legacy.par deleted file mode 100644 index 7a715bfe0..000000000 --- a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.legacy.par +++ /dev/null @@ -1,69 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of WaveToy using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = 10000 - - - -#ActiveThorns = "MPIClock" - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -Activethorns = "LoopControl" - -LoopControl::printstats = yes -#LoopControl::verbose = yes -#LoopControl::debug = yes - -LoopControl::legacy_init = yes - - - -ActiveThorns = "InitBase" - -InitBase::initial_data_setup_method = "init_two_levels" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::constant_load_per_processor = yes -driver::global_nsize = 80 -driver::ghost_size = 1 - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -grid::type = "box" - - - -ActiveThorns = "IDScalarWaveC WaveToyC" - -WaveToy::bound = "zero" - - - -ActiveThorns = "Time" - -Time::dtfac = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = 1000 -IOBasic::outInfo_reductions = "maximum" -IOBasic::outInfo_vars = " - Carpet::grid_points_per_second - Carpet::grid_point_updates_count - Carpet::time_total - Carpet::time_computing -" diff --git a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.standard.par b/Carpet/LoopControl/par/WaveToy_Carpet_1lev.standard.par deleted file mode 100644 index c776d8ec6..000000000 --- a/Carpet/LoopControl/par/WaveToy_Carpet_1lev.standard.par +++ /dev/null @@ -1,67 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of WaveToy using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = 10000 - - - -#ActiveThorns = "MPIClock" - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -Activethorns = "LoopControl" - -LoopControl::printstats = yes -#LoopControl::verbose = yes -#LoopControl::debug = yes - - - -ActiveThorns = "InitBase" - -InitBase::initial_data_setup_method = "init_two_levels" - - - -ActiveThorns = "Carpet CarpetLib CarpetReduce" - -Carpet::constant_load_per_processor = yes -driver::global_nsize = 80 -driver::ghost_size = 1 - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -grid::type = "box" - - - -ActiveThorns = "IDScalarWaveC WaveToyC" - -WaveToy::bound = "zero" - - - -ActiveThorns = "Time" - -Time::dtfac = 0.5 - - - -ActiveThorns = "CarpetIOBasic" - -IOBasic::outInfo_every = 1000 -IOBasic::outInfo_reductions = "maximum" -IOBasic::outInfo_vars = " - Carpet::grid_points_per_second - Carpet::grid_point_updates_count - Carpet::time_total - Carpet::time_computing -" diff --git a/Carpet/LoopControl/par/WaveToy_LoopControl.par b/Carpet/LoopControl/par/WaveToy_LoopControl.par deleted file mode 100644 index 81fe68df6..000000000 --- a/Carpet/LoopControl/par/WaveToy_LoopControl.par +++ /dev/null @@ -1,6 +0,0 @@ -ActiveThorns = "LoopControl GSL" - -LoopControl::run_demo = yes -LoopControl::nx = 300 -LoopControl::ny = 300 -LoopControl::nz = 300 diff --git a/Carpet/LoopControl/par/WaveToy_PUGH.standard.par b/Carpet/LoopControl/par/WaveToy_PUGH.standard.par deleted file mode 100644 index a8e166190..000000000 --- a/Carpet/LoopControl/par/WaveToy_PUGH.standard.par +++ /dev/null @@ -1,63 +0,0 @@ -Cactus::cctk_run_title = "Benchmark of WaveToy using Carpet with one refinement level" -Cactus::cctk_timer_output = "full" - -Cactus::cctk_itlast = 10000 - - - -#ActiveThorns = "MPIClock" - - - -ActiveThorns = "IOUtil" - -#IO::print_timing_info = yes - - - -Activethorns = "LoopControl" - -LoopControl::printstats = yes -#LoopControl::verbose = yes -#LoopControl::debug = yes - - - -ActiveThorns = "InitBase" - -InitBase::initial_data_setup_method = "init_two_levels" - - - -ActiveThorns = "PUGH PUGHInterp PUGHReduce PUGHSlab LocalReduce" - -PUGH::local_size_includes_ghosts = no -driver::local_nsize = 80 -driver::ghost_size = 1 - - - -ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" - -grid::type = "box" - - - -ActiveThorns = "IDScalarWaveC WaveToyC" - -WaveToy::bound = "zero" - - - -ActiveThorns = "Time" - -Time::dtfac = 0.5 - - - -ActiveThorns = "IOBasic" - -IOBasic::outInfo_every = 1000 -IOBasic::outInfo_vars = " - WaveToy::phi -" diff --git a/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-legacy.par b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-legacy.par new file mode 100644 index 000000000..54f111a91 --- /dev/null +++ b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-legacy.par @@ -0,0 +1,126 @@ +Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" +Cactus::cctk_timer_output = "full" + +Cactus::cctk_itlast = 100 + + + +ActiveThorns = "CycleClock Fortran hwloc MPI" + + + +ActiveThorns = "IOUtil" + +IO::out_dir = $parfile + + + +ActiveThorns = "InitBase" + + + +ActiveThorns = "LoopControl" + +LoopControl::verbose = no +LoopControl::veryverbose = no +LoopControl::selftest = no + +LoopControl::initial_setup = "legacy" + + + +ActiveThorns = "Carpet CarpetLib CarpetReduce" + +Carpet::domain_from_coordbase = yes + +driver::ghost_size = 3 + +Carpet::init_fill_timelevels = yes + +#CarpetLib::print_timestats_every = 1 +CarpetLib::print_memstats_every = 100 + + + +ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" + +CoordBase::domainsize = "minmax" +CoordBase::xmin = 0.0 +CoordBase::ymin = 0.0 +CoordBase::zmin = 0.0 +CoordBase::xmax = 1.0 +CoordBase::ymax = 1.0 +CoordBase::zmax = 1.0 + +CoordBase::spacing = "numcells" +CoordBase::ncells_x = 30 +CoordBase::ncells_y = 30 +CoordBase::ncells_z = 30 + +CartGrid3D::type = "coordbase" +CartGrid3D::avoid_originx = no +CartGrid3D::avoid_originy = no +CartGrid3D::avoid_originz = no + +CoordBase::boundary_size_x_lower = 3 +CoordBase::boundary_size_y_lower = 3 +CoordBase::boundary_size_z_lower = 3 +CoordBase::boundary_size_x_upper = 3 +CoordBase::boundary_size_y_upper = 3 +CoordBase::boundary_size_z_upper = 3 + + + +ActiveThorns = "MoL NaNChecker Time" + +MoL::ODE_Method = "RK4" +MoL::MoL_Intermediate_Steps = 4 +MoL::MoL_Num_Scratch_Levels = 1 + +Time::dtfac = 0.4 + + + +ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" + +ADMBase::initial_data = "Cartesian Minkowski" +ADMBase::initial_lapse = "one" +ADMBase::initial_shift = "zero" +ADMBase::initial_dtlapse = "zero" +ADMBase::initial_dtshift = "zero" + + + +ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" + +ADMBase::evolution_method = "ML_BSSN" +ADMBase::lapse_evolution_method = "ML_BSSN" +ADMBase::shift_evolution_method = "ML_BSSN" + +ML_BSSN::my_boundary_condition = "Minkowski" + +ML_BSSN::harmonicN = 1 # 1+log +ML_BSSN::harmonicF = 2.0 # 1+log +ML_BSSN::ShiftGammaCoeff = 0.75 +ML_BSSN::BetaDriver = 0.5 + + + +ActiveThorns = "CarpetIOBasic" + +IOBasic::outInfo_every = 100 +IOBasic::outInfo_vars = "ADMBase::alp" + + + +#ActiveThorns = "CarpetIOASCII" +# +#IOASCII::out0D_every = 100 +#IOASCII::out0D_vars = "Carpet::timing" + + + +ActiveThorns = "TimerReport" + +TimerReport::output_all_timers = yes +TimerReport::all_timers_clock = "cycle" diff --git a/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-test.par b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-test.par new file mode 100644 index 000000000..8c13b89f2 --- /dev/null +++ b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-test.par @@ -0,0 +1,126 @@ +Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" +Cactus::cctk_timer_output = "full" + +Cactus::cctk_itlast = 100 + + + +ActiveThorns = "CycleClock Fortran hwloc MPI" + + + +ActiveThorns = "IOUtil" + +IO::out_dir = $parfile + + + +ActiveThorns = "InitBase" + + + +ActiveThorns = "LoopControl" + +LoopControl::verbose = no +LoopControl::veryverbose = no +LoopControl::selftest = yes + +LoopControl::initial_setup = "tiled" + + + +ActiveThorns = "Carpet CarpetLib CarpetReduce" + +Carpet::domain_from_coordbase = yes + +driver::ghost_size = 3 + +Carpet::init_fill_timelevels = yes + +#CarpetLib::print_timestats_every = 1 +CarpetLib::print_memstats_every = 100 + + + +ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" + +CoordBase::domainsize = "minmax" +CoordBase::xmin = 0.0 +CoordBase::ymin = 0.0 +CoordBase::zmin = 0.0 +CoordBase::xmax = 1.0 +CoordBase::ymax = 1.0 +CoordBase::zmax = 1.0 + +CoordBase::spacing = "numcells" +CoordBase::ncells_x = 30 +CoordBase::ncells_y = 30 +CoordBase::ncells_z = 30 + +CartGrid3D::type = "coordbase" +CartGrid3D::avoid_originx = no +CartGrid3D::avoid_originy = no +CartGrid3D::avoid_originz = no + +CoordBase::boundary_size_x_lower = 3 +CoordBase::boundary_size_y_lower = 3 +CoordBase::boundary_size_z_lower = 3 +CoordBase::boundary_size_x_upper = 3 +CoordBase::boundary_size_y_upper = 3 +CoordBase::boundary_size_z_upper = 3 + + + +ActiveThorns = "MoL NaNChecker Time" + +MoL::ODE_Method = "RK4" +MoL::MoL_Intermediate_Steps = 4 +MoL::MoL_Num_Scratch_Levels = 1 + +Time::dtfac = 0.4 + + + +ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" + +ADMBase::initial_data = "Cartesian Minkowski" +ADMBase::initial_lapse = "one" +ADMBase::initial_shift = "zero" +ADMBase::initial_dtlapse = "zero" +ADMBase::initial_dtshift = "zero" + + + +ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" + +ADMBase::evolution_method = "ML_BSSN" +ADMBase::lapse_evolution_method = "ML_BSSN" +ADMBase::shift_evolution_method = "ML_BSSN" + +ML_BSSN::my_boundary_condition = "Minkowski" + +ML_BSSN::harmonicN = 1 # 1+log +ML_BSSN::harmonicF = 2.0 # 1+log +ML_BSSN::ShiftGammaCoeff = 0.75 +ML_BSSN::BetaDriver = 0.5 + + + +ActiveThorns = "CarpetIOBasic" + +IOBasic::outInfo_every = 100 +IOBasic::outInfo_vars = "ADMBase::alp" + + + +#ActiveThorns = "CarpetIOASCII" +# +#IOASCII::out0D_every = 100 +#IOASCII::out0D_vars = "Carpet::timing" + + + +ActiveThorns = "TimerReport" + +TimerReport::output_all_timers = yes +TimerReport::all_timers_clock = "cycle" diff --git a/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-tiled.par b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-tiled.par new file mode 100644 index 000000000..798387945 --- /dev/null +++ b/Carpet/LoopControl/par/bench-minkowski-carpet-1lev-tiled.par @@ -0,0 +1,126 @@ +Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" +Cactus::cctk_timer_output = "full" + +Cactus::cctk_itlast = 100 + + + +ActiveThorns = "CycleClock Fortran hwloc MPI" + + + +ActiveThorns = "IOUtil" + +IO::out_dir = $parfile + + + +ActiveThorns = "InitBase" + + + +ActiveThorns = "LoopControl" + +LoopControl::verbose = no +LoopControl::veryverbose = no +LoopControl::selftest = no + +LoopControl::initial_setup = "tiled" + + + +ActiveThorns = "Carpet CarpetLib CarpetReduce" + +Carpet::domain_from_coordbase = yes + +driver::ghost_size = 3 + +Carpet::init_fill_timelevels = yes + +#CarpetLib::print_timestats_every = 1 +CarpetLib::print_memstats_every = 100 + + + +ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" + +CoordBase::domainsize = "minmax" +CoordBase::xmin = 0.0 +CoordBase::ymin = 0.0 +CoordBase::zmin = 0.0 +CoordBase::xmax = 1.0 +CoordBase::ymax = 1.0 +CoordBase::zmax = 1.0 + +CoordBase::spacing = "numcells" +CoordBase::ncells_x = 30 +CoordBase::ncells_y = 30 +CoordBase::ncells_z = 30 + +CartGrid3D::type = "coordbase" +CartGrid3D::avoid_originx = no +CartGrid3D::avoid_originy = no +CartGrid3D::avoid_originz = no + +CoordBase::boundary_size_x_lower = 3 +CoordBase::boundary_size_y_lower = 3 +CoordBase::boundary_size_z_lower = 3 +CoordBase::boundary_size_x_upper = 3 +CoordBase::boundary_size_y_upper = 3 +CoordBase::boundary_size_z_upper = 3 + + + +ActiveThorns = "MoL NaNChecker Time" + +MoL::ODE_Method = "RK4" +MoL::MoL_Intermediate_Steps = 4 +MoL::MoL_Num_Scratch_Levels = 1 + +Time::dtfac = 0.4 + + + +ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" + +ADMBase::initial_data = "Cartesian Minkowski" +ADMBase::initial_lapse = "one" +ADMBase::initial_shift = "zero" +ADMBase::initial_dtlapse = "zero" +ADMBase::initial_dtshift = "zero" + + + +ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" + +ADMBase::evolution_method = "ML_BSSN" +ADMBase::lapse_evolution_method = "ML_BSSN" +ADMBase::shift_evolution_method = "ML_BSSN" + +ML_BSSN::my_boundary_condition = "Minkowski" + +ML_BSSN::harmonicN = 1 # 1+log +ML_BSSN::harmonicF = 2.0 # 1+log +ML_BSSN::ShiftGammaCoeff = 0.75 +ML_BSSN::BetaDriver = 0.5 + + + +ActiveThorns = "CarpetIOBasic" + +IOBasic::outInfo_every = 100 +IOBasic::outInfo_vars = "ADMBase::alp" + + + +#ActiveThorns = "CarpetIOASCII" +# +#IOASCII::out0D_every = 100 +#IOASCII::out0D_vars = "Carpet::timing" + + + +ActiveThorns = "TimerReport" + +TimerReport::output_all_timers = yes +TimerReport::all_timers_clock = "cycle" diff --git a/Carpet/LoopControl/param.ccl b/Carpet/LoopControl/param.ccl index 01b7f254c..2a2ddefe8 100644 --- a/Carpet/LoopControl/param.ccl +++ b/Carpet/LoopControl/param.ccl @@ -1,203 +1,68 @@ # Parameter definitions for thorn LoopControl -################# -# General options - -BOOLEAN printstats "Output timing statistics" STEERABLE=recover +BOOLEAN verbose "Output some loop information at run time" STEERABLE=always { } "no" -CCTK_REAL printstats_every_minutes "Output timing statistics every so many minutes" STEERABLE=always -{ - 0.0:* :: "" -} 60.0 - -CCTK_REAL printstats_threshold "Output timing statistics for loops costing at least this many percent" STEERABLE=always -{ - 0.0:100.0 :: "" -} 2.0 - -CCTK_INT printstats_verbosity "Level of detail for statistics" STEERABLE=always -{ - 0 :: "Output only a global summary" - 1 :: "Output summary for every loop" - 2 :: "Output summary for every loop parameter set" - 3 :: "Output everything" -} 1 - -BOOLEAN verbose "Verbosity" STEERABLE=always +BOOLEAN veryverbose "Output detailed debug information at run time" STEERABLE=always { } "no" -BOOLEAN debug "Output debug information" STEERABLE=always +BOOLEAN selftest "Run a self test with every loop (expensive)" STEERABLE=always { } "no" -BOOLEAN check_type_sizes "Check that Fortran and C types match" -{ -} "yes" - -BOOLEAN do_selftest "Perform (expensive) self-tests in every loop" -{ -} "no" - - - -################# -# Thread topology - -CCTK_INT lc_inthreads "Number of threads in the i-direction" STEERABLE=recover -{ - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 - -CCTK_INT lc_jnthreads "Number of threads in the j-direction" STEERABLE=recover -{ - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 - -CCTK_INT lc_knthreads "Number of threads in the k-direction" STEERABLE=recover -{ - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 - -###################### -# Tiling specification - -CCTK_INT lc_inpoints "Number of grid points in the i-direction" STEERABLE=recover -{ - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 - -CCTK_INT lc_jnpoints "Number of grid points in the j-direction" STEERABLE=recover +KEYWORD initial_setup "Initial configuration" STEERABLE=always { - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 + "legacy" :: "Like a non-LoopControl loop" + "tiled" :: "Basic LoopControl setup" +} "tiled" -CCTK_INT lc_knpoints "Number of grid points in the k-direction" STEERABLE=recover -{ - -1 :: "choose automatically" - 1:* :: "user-specified value" -} -1 - -########################## -# Use legacy configuration - -BOOLEAN legacy_init "Initialise with legacy configuration (usually slower)" STEERABLE=recover +# NOTE: +# - Intel chips divide the D1 cache into two, one for each hyperthread. +# The cache is thus not shared! +BOOLEAN use_smt_threads "Place SMT threads close together" STEERABLE=always { } "yes" - - -########################### -# Automatic: simple cycling - -BOOLEAN cycle_j_tilings "Cycle through all available tilings in the j-direction" STEERABLE=recover +BOOLEAN align_with_cachelines "Align innermost loops with cache line size" STEERABLE=always { -} "no" - - +} "yes" -################################ -# Automatic: simulated annealing -BOOLEAN use_simulated_annealing "Find a good loop configuration through simulated annealing" STEERABLE=recover -{ -} "no" -CCTK_INT siman_iters_fixed_T "" STEERABLE=recover +INT tilesize_i "Tile size in i direction (in grid points) for loop tiling" STEERABLE=always { 1:* :: "" -} 1 - -CCTK_REAL siman_probability_change_topology "" STEERABLE=recover -{ - 0:1 :: "" -} 0.1 - -CCTK_REAL siman_step_size "" STEERABLE=recover -{ - (1.0:* :: "" -} 3.0 - -CCTK_REAL siman_k "energy scale" STEERABLE=recover -{ - (0:* :: "" -} 1.0e-9 +} 4 -CCTK_REAL siman_T_initial "initial variability" STEERABLE=recover +INT tilesize_j "Tile size in j direction (in grid points) for loop tiling" STEERABLE=always { - (0:* :: "" -} 1.0 - -CCTK_REAL siman_mu_T "speed" STEERABLE=recover -{ - (0:* :: "" -} 1.005 - -CCTK_REAL siman_T_min "stopping criterion" STEERABLE=recover -{ - (0:* :: "" -} 0.01 - - - -######################################### -# Automatic: random restart hill climbing - -BOOLEAN use_random_restart_hill_climbing "http://en.wikipedia.org/wiki/Hill_climbing http://en.wikipedia.org/wiki/Tabu_search" STEERABLE=always -{ -} "no" - -CCTK_REAL maximum_setup_overhead "Maximum allowable administrative overhead" STEERABLE=always -{ - 0.0:* :: "" -} 0.01 - -BOOLEAN ignore_initial_overhead "Ignore the overhead from the initial setup" STEERABLE=recover -{ -} "yes" - -CCTK_REAL probability_small_jump "Probability for a small jump once a local minimum has been reached" STEERABLE=always -{ - 0.0:1.0 :: "" -} 0.1 + 1:* :: "" +} 4 -CCTK_INT small_jump_distance "Maximum distance for a small jump" STEERABLE=always +INT tilesize_k "Tile size in k direction (in grid points) for loop tiling" STEERABLE=always { - 0:* :: "" -} 3 + 1:* :: "" +} 4 -CCTK_REAL probability_random_jump "Probability for a random jump once a local minimum has been reached" STEERABLE=always -{ - 0.0:1.0 :: "" -} 0.01 -CCTK_INT max_jump_attempts "Maximum number of attempts to find a random unknown location" STEERABLE=always -{ - 0:* :: "" -} 10 -CCTK_REAL immediate_overhead_threshold "The maximum overhead (ratio of current to best known time) allowed during an excursion" STEERABLE=always +INT loopsize_i "Size of each thread's loop in i direction (in grid points) for multithreading" STEERABLE=always { - 0.0:* :: "" -} 1.0 + 1:* :: "" +} 8 -CCTK_REAL delayed_overhead_threshold "The maximum overhead (ratio of current to best known time) allowed during an excursion" STEERABLE=always +INT loopsize_j "Size of each thread's loop in j direction (in grid points) for multithreading" STEERABLE=always { - 0.0:* :: "" -} 0.1 + 1:* :: "" +} 8 -CCTK_INT overhead_threshold_delay "Number of steps in an excursion before the delayed overhead criterion is applied" STEERABLE=always +INT loopsize_k "Size of each thread's loop in k direction (in grid points) for multithreading" STEERABLE=always { - 0:* :: "" -} 20 + 1:* :: "" +} 8 diff --git a/Carpet/LoopControl/schedule.ccl b/Carpet/LoopControl/schedule.ccl index 090da0f5a..6ad9df9eb 100644 --- a/Carpet/LoopControl/schedule.ccl +++ b/Carpet/LoopControl/schedule.ccl @@ -1,23 +1,18 @@ # Schedule definitions for thorn LoopControl -if (check_type_sizes) { - SCHEDULE lc_check_type_sizes AT startup - { - LANG: C - OPTIONS: meta - } "Check that sizes of control structures are identical in C and Fortran" -} +SCHEDULE lc_setup AT startup BEFORE Driver_Startup +{ + LANG: C +} "Set up LoopControl" -if (printstats) { - SCHEDULE lc_printstats_analysis AT analysis - { - LANG: C - OPTIONS: meta - } "Output loop control statistics" - - SCHEDULE lc_printstats_terminate AT terminate - { - LANG: C - OPTIONS: meta - } "Output loop control statistics" -} +SCHEDULE lc_statistics_maybe AT analysis +{ + LANG: C + OPTIONS: meta +} "Output LoopControl statistics" + +SCHEDULE lc_statistics AT terminate +{ + LANG: C + OPTIONS: meta +} "Output LoopControl statistics" diff --git a/Carpet/LoopControl/src/lc_auto.c b/Carpet/LoopControl/src/lc_auto.c deleted file mode 100644 index 3b4a1da9c..000000000 --- a/Carpet/LoopControl/src/lc_auto.c +++ /dev/null @@ -1,140 +0,0 @@ -#include -#include - -#include - -#include -#include - -/* #ifdef HAVE_TGMATH_H */ -/* # include */ -/* #endif */ - -#include "lc_siman.h" - -#include "lc_auto.h" - - - -static lc_statset_t const * restrict statset; - - - -static inline -int -step (int const oldval, int const maxval, - gsl_rng const * restrict const rng, double const step_size) -{ - int const offset = (int) ((2 * gsl_rng_uniform_pos (rng) - 1) * step_size); - return (oldval + offset + maxval) % maxval; -} - - - -static -void -take_step (const gsl_rng *rng, void *xp_, double step_size) -{ - DECLARE_CCTK_PARAMETERS; - lc_state_t * restrict const xp = xp_; - if (gsl_rng_uniform (rng) < siman_probability_change_topology) { - xp->topology = gsl_rng_uniform_int (rng, statset->ntopologies); - } - for (int d=0; d<3; ++d) { - xp->tiling[d] = step (xp->tiling[d], statset->ntilings[d], rng, step_size); - } -} - -static -double -distance (void *xp_, void *yp_) -{ - lc_state_t const * restrict const xp = xp_; - lc_state_t const * restrict const yp = yp_; - double dist = 10 * (xp->topology != yp->topology); - for (int d=0; d<3; ++d) { - dist += fabs (xp->tiling[d] - yp->tiling[d]); - } - return dist / 2; /* 2 = sqrt(4) */ -} - -static -void -print_position (void *xp_) -{ - lc_state_t const * restrict const xp = xp_; - printf (" %2d/{%3d,%3d,%3d}", - xp->topology, - xp->tiling[0], xp->tiling[1], xp->tiling[2]); -} - - - -void -lc_auto_init (lc_statset_t * restrict const ls, - lc_state_t * restrict const state) -{ - DECLARE_CCTK_PARAMETERS; - assert (! cycle_j_tilings); /* don't mix strategies */ - - if (! ls->auto_state) { - /* First call for this user parameter set of this loop */ - - ls->auto_state = malloc (sizeof * ls->auto_state); - - /* Initialise simulated annealing parameters */ - ls->auto_state->siman_params.iters_fixed_T = siman_iters_fixed_T; - ls->auto_state->siman_params.step_size = siman_step_size; - ls->auto_state->siman_params.k = siman_k; - ls->auto_state->siman_params.t_initial = siman_T_initial; - ls->auto_state->siman_params.mu_t = siman_mu_T; - ls->auto_state->siman_params.t_min = siman_T_min; - - /* Create random number generator */ - gsl_rng_env_setup(); - ls->auto_state->rng = gsl_rng_alloc (gsl_rng_default); - - /* Set initial state */ - ls->auto_state->state = * state; - - /* Initialise simulated annealing state */ - statset = ls; - ls->auto_state->siman_state = - lc_siman_solve (NULL, - ls->auto_state->rng, - NULL, 0.0, - take_step, distance, verbose ? print_position : NULL, - sizeof (lc_state_t), - ls->auto_state->siman_params); - - } else { - /* Not the first call */ - - if (ls->auto_state->siman_state) { - /* The solver is still active: ask for the next state */ - statset = ls; - ls->auto_state->siman_state = - lc_siman_solve (ls->auto_state->siman_state, - ls->auto_state->rng, - & ls->auto_state->state, ls->auto_state->time, - take_step, distance, verbose ? print_position : NULL, - sizeof (lc_state_t), - ls->auto_state->siman_params); - } - - /* Set thread topology and tiling specification */ - * state = ls->auto_state->state; - - } /* if not the first call */ -} - - - -void -lc_auto_finish (lc_statset_t * restrict const ls, - lc_stattime_t const * restrict const lt) -{ - ls->auto_state->time = - lt->time_calc_sum / - (lt->time_count * ls->npoints[0] * ls->npoints[1] * ls->npoints[2]); -} diff --git a/Carpet/LoopControl/src/lc_auto.h b/Carpet/LoopControl/src/lc_auto.h deleted file mode 100644 index a89db6aa8..000000000 --- a/Carpet/LoopControl/src/lc_auto.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef LC_AUTO_H -#define LC_AUTO_H - -#include - -#include "lc_siman.h" -#include "loopcontrol.h" - - - -/* no typedef here; forward declcared in loopcontrol.h */ -struct lc_auto_state_t { - lc_siman_params_t siman_params; - gsl_rng * rng; - lc_siman_state_t * siman_state; - - lc_state_t state; - double time; -}; - - - -void -lc_auto_init (lc_statset_t * restrict const ls, - lc_state_t * restrict const state); - -void -lc_auto_finish (lc_statset_t * restrict const ls, - lc_stattime_t const * restrict const lt); - -#endif /* #ifndef LC_AUTO_H */ diff --git a/Carpet/LoopControl/src/lc_get_type_sizes.F90 b/Carpet/LoopControl/src/lc_get_type_sizes.F90 deleted file mode 100644 index 8d60c58bf..000000000 --- a/Carpet/LoopControl/src/lc_get_type_sizes.F90 +++ /dev/null @@ -1,21 +0,0 @@ -#include "cctk.h" -#include "cctk_Functions.h" - -subroutine lc_get_fortran_type_sizes (lc_control_size, lc_statmap_size) - - use loopcontrol_types - - implicit none - - DECLARE_CCTK_FUNCTIONS - - integer, intent(out) :: lc_control_size, lc_statmap_size - - type(lc_control_t), dimension(2) :: lc_lc - type(lc_statmap_t), dimension(2) :: lc_lm - - ! Note: This conversion from pointer to (small) integer is safe - lc_control_size = CCTK_PointerTo(lc_lc(2)) - CCTK_PointerTo(lc_lc(1)) - lc_statmap_size = CCTK_PointerTo(lc_lm(2)) - CCTK_PointerTo(lc_lm(1)) - -end subroutine lc_get_fortran_type_sizes diff --git a/Carpet/LoopControl/src/lc_hill.c b/Carpet/LoopControl/src/lc_hill.c deleted file mode 100644 index b0ddcc762..000000000 --- a/Carpet/LoopControl/src/lc_hill.c +++ /dev/null @@ -1,345 +0,0 @@ -#include -#include -#include - -#include -#include - -#include "loopcontrol.h" - -#include "lc_hill.h" - - - -#define debug_assert assert - - - -static -double -drand (void) -{ - double const r = rand() / (RAND_MAX + 1.0); - assert (r >= 0.0 && r < 1.0); - return r; -} - -static -int -irand (int const imaxval) -{ - assert (imaxval >= 0); - int const i = drand() * imaxval; - assert (i >= 0 && i < imaxval); - return i; -} - - - -static -double -time_for_stattime (lc_stattime_t const * restrict const lt) -{ - assert (lt); - return lt->time_calc_sum / lt->time_count; -} - -#if 0 -/* unused */ -static -double -time_for_state (lc_statset_t const * restrict const ls, - lc_state_t const * restrict const state) -{ - lc_stattime_t const * restrict const lt = lc_stattime_find (ls, state); - assert (lt); - return time_for_stattime (lt); -} -#endif - - - -void -lc_hill_init (lc_statset_t * restrict const ls, - lc_state_t * const new_state) -{ - DECLARE_CCTK_PARAMETERS; - - lc_hill_state_t * restrict lh = ls->hill_state; - - /* Initialise state */ - if (! lh) { - if (verbose) { - CCTK_INFO ("Hill climbing: Initialising"); - } - ls->hill_state = malloc (sizeof * ls->hill_state); - lh = ls->hill_state; - lh->iteration = 0; - lh->have_best = 0; - lh->excursion_start = 0; - lh->have_previous = 0; - lh->state = * new_state; - return; - } - - /* If the overhead has become too large, do nothing. */ - if (ls->time_setup_sum > maximum_setup_overhead * ls->time_calc_sum) { - /* Stay at the old state. */ - * new_state = lh->state; - return; - } - - ++ lh->iteration; - - if (verbose) { - CCTK_VInfo (CCTK_THORNSTRING, - "Hill climbing: iter %d, state %2d/{%2d,%2d,%2d}, time %g", - lh->iteration, - lh->state.topology, - lh->state.tiling[0], - lh->state.tiling[1], - lh->state.tiling[2], - lh->time); - } - - /* Test whether we have a new best time */ - if (! lh->have_best || lh->time < lh->best_time) { - /* Remember this state */ - if (verbose) { - CCTK_INFO ("Hill climbing: This is a new best time"); - } - lh->best = lh->state; - lh->best_time = lh->time; - lh->have_best = 1; - lh->excursion_start = lh->iteration; - } else if (lh->have_best && lc_state_equal (& lh->state, & lh->best)) { - /* Update time for best state */ - if (verbose) { - CCTK_INFO ("Hill climbing: Updating best time"); - } - lh->best_time = lh->time; - /* Is there now a better state? */ - for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { - double const time = lt->time_calc_sum / lt->time_count; - if (time < lh->best_time) { - lh->best = lt->state; - lh->best_time = time; - } - } - } - - /* Compare the time for the current state with the time for the - previous state. If the previous state was better, backtrack. */ - if (lh->have_previous && lh->previous_time < lh->time) { - if (verbose) { - CCTK_INFO ("Hill climbing: Backtracking"); - } - lh->state = lh->previous; - lh->time = lh->previous_time; - lh->have_previous = 0; - } - - /* Give up if the current time is too bad */ - if (lh->have_best) { - int const immediate_overhead = - lh->time > lh->best_time * (1.0 + immediate_overhead_threshold); - int const delayed_overhead = - lh->iteration > lh->excursion_start + overhead_threshold_delay && - lh->time > lh->best_time * (1.0 + delayed_overhead_threshold); - if (immediate_overhead || delayed_overhead) { - if (verbose) { - CCTK_INFO ("Hill climbing: Reverting to best known state"); - } - lh->excursion_start = lh->iteration; - lh->state = lh->best; - lh->time = lh->best_time; - } - } - - /* Age the state */ - lh->previous = lh->state; - lh->previous_time = lh->time; - lh->have_previous = 1; - - search:; - - /* Look which neighbours exist. */ - /* typedef enum { nb_boundary, nb_missing, nb_exists } neighbour_t; */ - /* neighbour_t neighbours[3][2]; */ - lc_state_t nb_state[3][2]; - double nb_time[3][2]; - lc_state_t * nb_nonexist_state[6]; - int num_nonexist_states = 0; - lc_state_t * nb_minimum_time = NULL; - double minimum_time; - for (int d=0; d<3; ++d) { - for (int f=0; f<2; ++f) { - nb_state[d][f] = lh->state; - nb_state[d][f].tiling[d] += f ? + 1: -1; - debug_assert (nb_state[d][f].topology >= 0); - debug_assert (nb_state[d][f].topology < ls->ntopologies); - int const ntilings = ls->topology_ntilings[d][nb_state[d][f].topology]; - if (nb_state[d][f].tiling[d] < 0 || - nb_state[d][f].tiling[d] >= ntilings) - { - /* neighbours[d][f] = nb_boundary; */ - /* do nothing */ - } else { - lc_stattime_t const * restrict const nb_lt = - lc_stattime_find (ls, & nb_state[d][f]); - if (! nb_lt) { - /* neighbours[d][f] = nb_missing; */ - debug_assert (num_nonexist_states < 6); - nb_nonexist_state[num_nonexist_states++] = & nb_state[d][f]; - } else { - /* neighbours[d][f] = nb_exists; */ - nb_time[d][f] = time_for_stattime (nb_lt); - if (! nb_minimum_time || nb_time[d][f] < minimum_time) { - nb_minimum_time = & nb_state[d][f]; - minimum_time = nb_time[d][f]; - } - } - } - } - } - - /* If not all neighbours exist, then choose a random neighbour and - move there. */ - if (num_nonexist_states > 0) { - if (verbose) { - CCTK_INFO ("Hill climbing: Examining a new state"); - } - int const choice = irand (num_nonexist_states); - lh->state = * nb_nonexist_state[choice]; - * new_state = lh->state; - return; - } - - if (! nb_minimum_time) { - /* There are no neighbours. Stay where we are. */ - if (verbose) { - CCTK_INFO ("Hill climbing: No neighbours, staying put"); - } - * new_state = lh->state; - return; - } - - /* All neighbours exist. Look whether we are in a local minimum. */ - assert (nb_minimum_time); - if (minimum_time >= lh->time) { - /* We are in a local minimum. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Local minimum reached"); - } - - /* Every so often take a small jump. */ - if (drand() < probability_small_jump) { - /* Be curious, go somewhere nearby. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Making a small jump"); - } - for (int ntries = 0; ntries < max_jump_attempts; ++ ntries) { - lc_state_t try_state = lh->state; - if (drand() < 0.25) { - /* Change the topology, but not the tiling. */ - try_state.topology = irand (ls->ntopologies); - for (int d=0; d<3; ++d) { - if (try_state.tiling[d] >= - ls->topology_ntilings[d][try_state.topology]) - { - /* The tiling doesn't fit for this new topology; don't - choose this topology. */ - goto next_try; - } - } - } else { - /* Change the tiling a bit, but keep the topology */ - for (int d=0; d<3; ++d) { - int const i0 = - lc_max (try_state.tiling[d] - small_jump_distance, 0); - int const i1 = - lc_min (try_state.tiling[d] + small_jump_distance + 1, - ls->topology_ntilings[d][try_state.topology]); - try_state.tiling[d] = i0 + irand (i1 - i0); - debug_assert (try_state.tiling[d] >= 0); - debug_assert (try_state.tiling[d] < - ls->topology_ntilings[d][try_state.topology]); - } - } - if (! lc_stattime_find (ls, & try_state)) { - lh->state = try_state; - * new_state = lh->state; - return; - } - next_try:; - } - /* Don't jump after all. */ - } - - /* Every so often take a random jump. */ - if (drand() < probability_random_jump) { - /* Be adventurous, go somewhere unknown. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Jumping randomly"); - } - for (int ntries = 0; ntries < max_jump_attempts; ++ ntries) { - lc_state_t try_state; - try_state.topology = irand (ls->ntopologies); - for (int d=0; d<3; ++d) { - try_state.tiling[d] = - irand (ls->topology_ntilings[d][try_state.topology]); - } - if (! lc_stattime_find (ls, & try_state)) { - /* The new state is hitherto unknown, use it. */ - lh->state = try_state; - lh->excursion_start = lh->iteration; - lh->have_previous = 0; /* disable backtracking */ - * new_state = lh->state; - return; - } - } - /* Don't jump after all. */ - } - - /* If the current state is not the best state, give up and go - back. */ - if (! lc_state_equal (& lh->state, & lh->best)) { - /* Revert to the best known state. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Reverting to best known state"); - } - lh->state = lh->best; - lh->excursion_start = lh->iteration; - lh->have_previous = 0; - * new_state = lh->best; - return; - } - - /* Be content, do nothing. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Resting"); - } - * new_state = lh->state; - return; - } - - /* One of the neighbours is better. Move to this neighbour, and - continue the search there. */ - if (verbose) { - CCTK_INFO ("Hill climbing: Found a better neighbour, going there"); - } - lh->state = * nb_minimum_time; - lh->time = minimum_time; - goto search; -} - - - -void -lc_hill_finish (lc_statset_t * restrict const ls, - lc_stattime_t const * restrict const lt) -{ - lc_hill_state_t * restrict const lh = ls->hill_state; - - lh->time = time_for_stattime (lt); -} diff --git a/Carpet/LoopControl/src/lc_hill.h b/Carpet/LoopControl/src/lc_hill.h deleted file mode 100644 index 7d418314b..000000000 --- a/Carpet/LoopControl/src/lc_hill.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef LC_HILL_H -#define LC_HILL_H - -#include - -#include "loopcontrol.h" - - - -/* no typedef here; forward declcared in loopcontrol.h */ -struct lc_hill_state_t { - int iteration; - - lc_state_t best; - double best_time; - int have_best; - - int excursion_start; /* -1 if not on an excursion */ - - lc_state_t previous; - double previous_time; - int have_previous; - - lc_state_t state; - double time; -}; - - - -void -lc_hill_init (lc_statset_t * restrict const ls, - lc_state_t * restrict const state); - -void -lc_hill_finish (lc_statset_t * restrict const ls, - lc_stattime_t const * restrict const lt); - -#endif /* #ifndef LC_HILL_H */ diff --git a/Carpet/LoopControl/src/lc_siman.c b/Carpet/LoopControl/src/lc_siman.c deleted file mode 100644 index 133111898..000000000 --- a/Carpet/LoopControl/src/lc_siman.c +++ /dev/null @@ -1,198 +0,0 @@ -/* Simulated annealing */ - -/* Adapted from GSL, the GNU Scientific Library, version 1.9 */ - -/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include - -/* #ifdef HAVE_TGMATH_H */ -/* # include */ -/* #endif */ - -#include "lc_siman.h" - -static inline -double safe_exp (double x) /* avoid underflow errors for large uphill steps */ -{ - return (x < GSL_LOG_DBL_MIN) ? 0.0 : exp(x); -} - -/* this structure contains internal state information for - lc_siman_solve */ - -typedef enum { state_initial, state_first, state_looping } lc_siman_location_t; - -/* no typedef here; forward declcared in lc_siman.h */ -struct lc_siman_state_t { - lc_siman_location_t state; - const gsl_rng *r; - lc_siman_step_t take_step; - lc_siman_metric_t distance; - lc_siman_print_t print_position; - size_t element_size; - lc_siman_params_t params; - void *x, *new_x, *best_x; - double E, new_E, best_E; - int i, done; - double T; - int n_evals, n_iter, n_accepts, n_rejects, n_eless; -}; - -/* implementation of a basic simulated annealing algorithm */ - -lc_siman_state_t * -lc_siman_solve (lc_siman_state_t *restrict state, - const gsl_rng *restrict r, - void *restrict x0_p, double E0, - lc_siman_step_t take_step, - lc_siman_metric_t distance, - lc_siman_print_t print_position, - size_t element_size, - lc_siman_params_t params) -{ - if (!state) { - state = malloc(sizeof *state); - state->state = state_initial; - } - switch (state->state) { - case state_initial: goto label_initial; - case state_first : goto label_first; - case state_looping: goto label_looping; - } - abort(); - - label_initial:; - state->n_evals = 1; - state->n_iter = 0; - - state->state = state_first; - return state; - - label_first:; - state->E = E0; - - state->x = malloc (element_size); - memcpy (state->x, x0_p, element_size); - state->new_x = malloc (element_size); - state->best_x = malloc (element_size); - memcpy (state->best_x, x0_p, element_size); - - state->best_E = state->E; - - state->T = params.t_initial; - state->done = 0; - - if (print_position) { - printf ("#-iter #-evals temperature position energy\n"); - } - - /* while (!done) */ - begin_while_notdone:; - if (state->done) goto end_while_notdone; - - state->n_accepts = 0; - state->n_rejects = 0; - state->n_eless = 0; - - /* for (i = 0; i < params.iters_fixed_T; ++i) */ - state->i = 0; - begin_for_i:; - if (state->i >= params.iters_fixed_T) goto end_for_i; - - memcpy (state->new_x, state->x, element_size); - - take_step (r, state->new_x, params.step_size); - memcpy (x0_p, state->new_x, element_size); - - state->state = state_looping; - return state; - - label_looping:; - state->new_E = E0; - - if (state->new_E <= state->best_E) { - memcpy (state->best_x, state->new_x, element_size); - state->best_E = state->new_E; - } - - ++state->n_evals; /* keep track of evaluations */ - /* now take the crucial step: see if the new point is accepted or - not, as determined by the boltzman probability */ - if (state->new_E < state->E) { - /* yay! take a step */ - memcpy (state->x, state->new_x, element_size); - state->E = state->new_E; - ++state->n_eless; - } else if (gsl_rng_uniform(r) < - safe_exp (-(state->new_E - state->E)/(params.k * state->T))) - { - /* yay! take a step */ - memcpy(state->x, state->new_x, element_size); - state->E = state->new_E; - ++state->n_accepts; - } else { - ++state->n_rejects; - } - - ++state->i; - goto begin_for_i; - end_for_i:; - - if (print_position) { - /* see if we need to print stuff as we go */ - /* printf("%5d %12g %5d %3d %3d %3d", n_iter, T, n_evals, */ - /* 100*n_eless/n_steps, 100*n_accepts/n_steps, */ - /* 100*n_rejects/n_steps); */ - printf ("%5d %7d %12g", state->n_iter, state->n_evals, state->T); - print_position (state->x); - printf (" %12g\n", state->E); - } - - /* apply the cooling schedule to the temperature */ - /* FIXME: I should also introduce a cooling schedule for the iters */ - state->T /= params.mu_t; - ++state->n_iter; - if (state->T < params.t_min) { - state->done = 1; - } - - goto begin_while_notdone; - end_while_notdone:; - - /* at the end, copy the result onto the initial point, so we pass it - back to the caller */ - memcpy (x0_p, state->best_x, element_size); - - free (state->x); - free (state->new_x); - free (state->best_x); - - free (state); - return NULL; -} diff --git a/Carpet/LoopControl/src/lc_siman.h b/Carpet/LoopControl/src/lc_siman.h deleted file mode 100644 index d5517ed64..000000000 --- a/Carpet/LoopControl/src/lc_siman.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Simulated annealing */ - -/* Adapted from GSL, the GNU Scientific Library, version 1.9 */ - -/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Mark Galassi - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef SIMAN_H -#define SIMAN_H - -#include -#include - -/* types for the function pointers passed to lc_siman_solve */ - -typedef void (*lc_siman_step_t) (const gsl_rng *r, void *xp, double step_size); -typedef double (*lc_siman_metric_t) (void *xp, void *yp); -typedef void (*lc_siman_print_t) (void *xp); - -/* this structure contains all the information needed to structure the - search, beyond the energy function, the step function and the - initial guess. */ - -typedef struct { - int iters_fixed_T; /* how many iterations at each temperature? */ - double step_size; /* max step size in the random walk */ - /* the following parameters are for the Boltzmann distribution */ - double k, t_initial, mu_t, t_min; -} lc_siman_params_t; - -/* prototype for the workhorse function */ - -typedef struct lc_siman_state_t lc_siman_state_t; - -lc_siman_state_t * -lc_siman_solve (lc_siman_state_t *restrict state, - const gsl_rng *restrict r, - void *restrict x0_p, double E, - lc_siman_step_t take_step, - lc_siman_metric_t distance, - lc_siman_print_t print_position, - size_t element_size, - lc_siman_params_t params); - -#endif /* #ifndef SIMAN_H */ diff --git a/Carpet/LoopControl/src/loopcontrol.F90 b/Carpet/LoopControl/src/loopcontrol.F90 index e27e34726..9d91ec407 100644 --- a/Carpet/LoopControl/src/loopcontrol.F90 +++ b/Carpet/LoopControl/src/loopcontrol.F90 @@ -1,41 +1,69 @@ +#include "cctk.h" + +#include "loopcontrol.h" + + + module loopcontrol - use loopcontrol_types + implicit none interface - subroutine lc_statmap_init (initialised, lc_lm, name) + subroutine lc_stats_init(stats, name) use loopcontrol_types implicit none - integer, intent(out) :: initialised - type (lc_statmap_t) :: lc_lm - character(*) :: name - end subroutine lc_statmap_init + CCTK_POINTER :: stats + character(*) :: name + end subroutine lc_stats_init - subroutine lc_control_init (lc_lc, lc_lm, & - imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh,di) + subroutine lc_control_init( & + control, stats, & + imin, jmin, kmin, & + imax, jmax, kmax, & + iash, jash, kash, & + di, dj, dk) use loopcontrol_types implicit none - type (lc_control_t) :: lc_lc - type (lc_statmap_t) :: lc_lm - integer, intent(in) :: imin, jmin, kmin - integer, intent(in) :: imax, jmax, kmax - integer, intent(in) :: ilsh, jlsh, klsh - integer, intent(in) :: di + type(lc_control_t) :: control + CCTK_POINTER :: stats + CCTK_POINTER :: imin, jmin, kmin + CCTK_POINTER :: imax, jmax, kmax + CCTK_POINTER :: iash, jash, kash + CCTK_POINTER :: di, dj, dk end subroutine lc_control_init - - subroutine lc_control_finish (lc_lc) + + subroutine lc_control_finish(control, stats) use loopcontrol_types implicit none - type (lc_control_t) :: lc_lc + type(lc_control_t) :: control + CCTK_POINTER :: stats end subroutine lc_control_finish - - subroutine lc_get_fortran_type_sizes (sum_of_type_sizes) + + subroutine lc_thread_init(control) + use loopcontrol_types + implicit none + type(lc_control_t) :: control + end subroutine lc_thread_init + + logical function lc_thread_done(control) + use loopcontrol_types + implicit none + type(lc_control_t) :: control + end function lc_thread_done + + subroutine lc_thread_step(control) use loopcontrol_types implicit none - integer, intent(out) :: sum_of_type_sizes + type(lc_control_t) :: control + end subroutine lc_thread_step + + subroutine lc_get_fortran_type_sizes(type_sizes) + use loopcontrol_types + implicit none + CCTK_POINTER :: type_sizes(4) end subroutine lc_get_fortran_type_sizes - + end interface end module loopcontrol diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c deleted file mode 100644 index 0acc13f0e..000000000 --- a/Carpet/LoopControl/src/loopcontrol.c +++ /dev/null @@ -1,1431 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef _OPENMP -# include -#endif - -/* #ifdef HAVE_TGMATH_H */ -/* # include */ -/* #endif */ - -#include "loopcontrol.h" - -#include "lc_auto.h" -#include "lc_hill.h" - - - -#ifndef _OPENMP -/* Replacements for some OpenMP routines if OpenMP is not available */ - -static inline -int -omp_get_thread_num (void) -{ - return 0; -} - -static inline -int -omp_get_num_threads (void) -{ - return 1; -} - -static inline -int -omp_get_max_threads (void) -{ - return 1; -} - -static inline -double -omp_get_wtime (void) -{ - struct timeval tv; - gettimeofday (& tv, NULL); - return tv.tv_sec + 1.0e-6 * tv.tv_usec; -} - -#endif - - - -/* Linked list of all loop statistics structures */ -lc_statmap_t * lc_statmap_list = NULL; - - - -/* Find all possible thread topologies */ -/* This finds all possible thread topologies which can be expressed as - NIxNJxNK x NIIxNJJxNKK. More complex topologies, e.g. based on a - recursive subdivision, are not considered (and cannot be expressed - with the data structures currently used in LoopControl). I expect - that more complex topologies are not necessary, since the number of - threads is usually quite small and contains many small factors in - its prime decomposition. */ -static -void -find_thread_topologies (lc_topology_t * restrict const topologies, - const int maxntopologies, - int * restrict const ntopologies, - int const nthreads) -{ - * ntopologies = 0; - - for (int nk=1; nk<=nthreads; ++nk) { - if (nthreads % nk == 0) { - for (int nj=1; nj<=nthreads/nk; ++nj) { - if (nthreads % (nj*nk) == 0) { - for (int ni=1; ni<=nthreads/(nj*nk); ++ni) { - if (nthreads % (ni*nj*nk) == 0) { - - int const nithreads = nthreads/(ni*nj*nk); - for (int nkk=1; nkk<=nithreads; ++nkk) { - if (nithreads % nkk == 0) { - for (int njj=1; njj<=nithreads/nkk; ++njj) { - if (nithreads % (njj*nkk) == 0) { - int const nii = nithreads/(njj*nkk); - - assert (* ntopologies < maxntopologies); - topologies[* ntopologies].nthreads[0][0] = ni; - topologies[* ntopologies].nthreads[0][1] = nj; - topologies[* ntopologies].nthreads[0][2] = nk; - topologies[* ntopologies].nthreads[1][0] = nii; - topologies[* ntopologies].nthreads[1][1] = njj; - topologies[* ntopologies].nthreads[1][2] = nkk; - ++ * ntopologies; - - } - } - } - } - - } - } - } - } - } - } -} - - - -#if 1 - -/* Find "good" tiling specifications */ -/* This calculates a subset of all possible thread specifications. One - aim is to reduce the search space by disregarding some - specifications. The other aim is to distribute the specifications - "equally", so that one does not have to spend much effort - investigating tiling specifications with very similar properties. - For example, if there are 200 grid points, then half of the - possible tiling specifications consist of splitting the domain into - two subdomains with [100+N, 100-N] points. This is avoided by - covering all possible tiling specifications in exponentially - growing step sizes. */ -static -int tiling_compare (const void * const a, const void * const b) -{ - lc_tiling_t const * const aa = a; - lc_tiling_t const * const bb = b; - return aa->npoints - bb->npoints; -} - -static -void -find_tiling_specifications (lc_tiling_t * restrict const tilings, - const int maxntilings, - int * restrict const ntilings, - int const npoints) -{ - /* In order to reduce the number of possible tilings, require that - the step sizes differ by more than 10%. */ - double const distance_factor = 1.1; - /* Determine the "good" step sizes in two passes: first small step - sizes from 1 up to snpoints, then large step sizes from npoints - down to snpoints+1. */ - int const snpoints = floor (sqrt (npoints)); - /* For N grid points and a minimum spacing factor F, there are at - most log(N) / log(F) possible tilings. There will be fewer, since - the actual spacings will be rounded up to integers. */ - - * ntilings = 0; - - /* Small step sizes */ - int minnpoints = 0; - for (int n=1; n<=snpoints; ++n) { - if ((double) n > minnpoints * distance_factor) { - assert (* ntilings < maxntilings); - tilings[* ntilings].npoints = n; - minnpoints = n; - ++ * ntilings; - } - } - - /* Large step sizes */ - int maxnpoints = 1000000000; - for (int n=npoints; n>snpoints; --n) { - if (n * distance_factor < (double) maxnpoints) { - assert (* ntilings < maxntilings); - tilings[* ntilings].npoints = n; - maxnpoints = n; - ++ * ntilings; - } - } - - /* Sort */ - qsort (tilings, * ntilings, sizeof * tilings, tiling_compare); - - /* step size should be at least 1, even if there are only 0 - points */ - if (* ntilings == 0) { - assert (* ntilings < maxntilings); - tilings[* ntilings].npoints = lc_max (npoints, 1); - ++ * ntilings; - } - - assert (* ntilings >= 1); -} - -#else - -static -void -find_tiling_specifications (lc_tiling_t * restrict const tilings, - const int maxntilings, - int * restrict const ntilings, - int const npoints) -{ - /* In order to reduce the number of possible tilings, require that - the step sizes differ by more than 10%. */ - double const distance_factor = 1.1; - /* For N grid points and a minimum spacing factor F, there are at - most log(N) / log(F) possible tilings. There will be fewer, since - the actual spacings will be rounded up to integers. */ - - * ntilings = 0; - - int minnpoints = 0; - for (int n=1; n minnpoints * distance_factor) { - assert (* ntilings < maxntilings); - tilings[* ntilings].npoints = n; - minnpoints = n; - ++ * ntilings; - } - } - - /* step size should be at least 1, even if there are only 0 - points */ - if (* ntilings == 0 || tilings[* ntilings - 1].npoints != npoints) { - assert (* ntilings < maxntilings); - tilings[* ntilings].npoints = lc_max (npoints, 1); - ++ * ntilings; - } -} - -#endif - - - -/* Initialise control parameter set statistics */ -void -lc_stattime_init (lc_stattime_t * restrict const lt, - lc_statset_t * restrict const ls, - lc_state_t const * restrict const state) -{ - DECLARE_CCTK_PARAMETERS; - - /* Check arguments */ - assert (lt); - assert (ls); - assert (state); - - /*** Topology ***************************************************************/ - - lt->state.topology = state->topology; - - if (state->topology == -1) { - - /* User-specified topology */ - lt->inthreads = -1; - lt->jnthreads = -1; - lt->knthreads = -1; - lt->inithreads = -1; - lt->jnithreads = -1; - lt->knithreads = -1; - - } else { - - assert (state->topology >= 0 && state->topology < ls->ntopologies); - - lt->inthreads = ls->topologies[lt->state.topology].nthreads[0][0]; - lt->jnthreads = ls->topologies[lt->state.topology].nthreads[0][1]; - lt->knthreads = ls->topologies[lt->state.topology].nthreads[0][2]; - lt->inithreads = ls->topologies[lt->state.topology].nthreads[1][0]; - lt->jnithreads = ls->topologies[lt->state.topology].nthreads[1][1]; - lt->knithreads = ls->topologies[lt->state.topology].nthreads[1][2]; - - } - - if (debug) { - printf ("Thread topology #%d [%d,%d,%d]x[%d,%d,%d]\n", - lt->state.topology, - lt->inthreads, lt->jnthreads, lt->knthreads, - lt->inithreads, lt->jnithreads, lt->knithreads); - } - - /* Assert thread topology consistency */ - if (lt->state.topology != -1) { - assert (lt->inthreads >= 1); - assert (lt->jnthreads >= 1); - assert (lt->knthreads >= 1); - assert (lt->inithreads >= 1); - assert (lt->jnithreads >= 1); - assert (lt->knithreads >= 1); - assert (lt->inthreads * lt->jnthreads * lt->knthreads * - lt->inithreads * lt->jnithreads * lt->knithreads == - ls->num_threads); - } - - /*** Tilings ****************************************************************/ - - for (int d=0; d<3; ++d) { - lt->state.tiling[d] = state->tiling[d]; - if (state->tiling[d] != -1) { - assert (state->tiling[d] >= 0 && - state->tiling[d] < ls->topology_ntilings[d][lt->state.topology]); - } - } - - if (state->tiling[0] != -1) { - lt->inpoints = ls->tilings[0][lt->state.tiling[0]].npoints; - } - if (state->tiling[1] != -1) { - lt->jnpoints = ls->tilings[1][lt->state.tiling[1]].npoints; - } - if (state->tiling[2] != -1) { - lt->knpoints = ls->tilings[2][lt->state.tiling[2]].npoints; - } - - if (debug) { - printf ("Tiling stride [%d,%d,%d]\n", - lt->inpoints, lt->jnpoints, lt->knpoints); - } - - /* Assert tiling specification consistency */ - if (state->tiling[0] != -1) { - assert (lt->inpoints > 0); - } - if (state->tiling[1] != -1) { - assert (lt->jnpoints > 0); - } - if (state->tiling[2] != -1) { - assert (lt->knpoints > 0); - } - - - - /* Initialise statistics */ - lt->time_count = 0.0; - lt->time_count_init = 0.0; - lt->time_setup_sum = 0.0; - lt->time_setup_sum2 = 0.0; - lt->time_calc_sum = 0.0; - lt->time_calc_sum2 = 0.0; - lt->time_calc_init = 0.0; - - lt->last_updated = 0.0; /* never updated */ - - - - /* Append to loop statistics list */ - lt->next = ls->stattime_list; - ls->stattime_list = lt; -} - -lc_stattime_t * -lc_stattime_find (lc_statset_t const * restrict const ls, - lc_state_t const * restrict const state) -{ - assert (ls); - - lc_stattime_t * lt; - - for (lt = ls->stattime_list; lt; lt = lt->next) { - if (lc_state_equal (& lt->state, state)) { - break; - } - } - - return lt; -} - -lc_stattime_t * -lc_stattime_find_create (lc_statset_t * restrict const ls, - lc_state_t const * restrict const state) -{ - assert (ls); - - lc_stattime_t * lt; - - for (lt = ls->stattime_list; lt; lt = lt->next) { - if (lc_state_equal (& lt->state, state)) { - break; - } - } - - if (! lt) { - lt = malloc (sizeof * lt); - assert (lt); - lc_stattime_init (lt, ls, state); - } - - assert (lt); - return lt; -} - - - -/* Initialise user parameter set statistics */ -void -lc_statset_init (lc_statset_t * restrict const ls, - lc_statmap_t * restrict const lm, - int const num_threads, - int const npoints[3]) -{ - DECLARE_CCTK_PARAMETERS; - - /* Check arguments */ - assert (ls); - assert (lm); - assert (num_threads >= 1); - int total_npoints = 1; - for (int d=0; d<3; ++d) { - assert (npoints[d] >= 0); - assert (npoints[d] < 1000000000); - assert (total_npoints == 0 || npoints[d] < 1000000000 / total_npoints); - total_npoints *= npoints[d]; - } - - /*** Threads ****************************************************************/ - - static int saved_maxthreads = -1; - static lc_topology_t * * saved_topologies = NULL; - static int * restrict saved_ntopologies = NULL; - - // Allocate memory the first time this function is called - if (saved_maxthreads < 0) { - saved_maxthreads = omp_get_max_threads(); - saved_topologies = malloc (saved_maxthreads * sizeof * saved_topologies ); - saved_ntopologies = malloc (saved_maxthreads * sizeof * saved_ntopologies); - assert (saved_topologies ); - assert (saved_ntopologies); - for (int n=0; n saved_maxthreads) { - int const old_saved_maxthreads = saved_maxthreads; - saved_maxthreads = num_threads; - saved_topologies = realloc (saved_topologies, saved_maxthreads * sizeof * saved_topologies ); - saved_ntopologies = realloc (saved_ntopologies, saved_maxthreads * sizeof * saved_ntopologies); - assert (saved_topologies ); - assert (saved_ntopologies); - for (int n=old_saved_maxthreads; nnum_threads = num_threads; - ls->topologies = saved_topologies [num_threads-1]; - ls->ntopologies = saved_ntopologies[num_threads-1]; - assert (ls->ntopologies > 0); - - /*** Tilings ****************************************************************/ - - for (int d=0; d<3; ++d) { - ls->npoints[d] = npoints[d]; - } - - /* For up to 1000000 grid points, there are at most 126 possible - tilings (assuming a minimum distance of 10%) */ - int const maxntilings = 1000; - for (int d=0; d<3; ++d) { - if (debug) { - printf ("Dimension %d: %d points\n", d, ls->npoints[d]); - } - ls->tilings[d] = malloc (maxntilings * sizeof * ls->tilings[d]); - assert (ls->tilings[d]); - find_tiling_specifications - (ls->tilings[d], maxntilings, & ls->ntilings[d], ls->npoints[d]); - ls->topology_ntilings[d] = - malloc (ls->ntopologies * sizeof * ls->topology_ntilings[d]); - assert (ls->topology_ntilings[d]); - for (int n = 0; n < ls->ntopologies; ++n) { - int tiling; - for (tiling = 1; tiling < ls->ntilings[d]; ++tiling) { - if (ls->tilings[d][tiling].npoints * - ls->topologies[n].nthreads[0][d] * - ls->topologies[n].nthreads[1][d] > - ls->npoints[d]) - { - break; - } - } - for (int t=tiling; t < ls->ntilings[d]; ++t) { - assert (ls->tilings[d][t].npoints * - ls->topologies[n].nthreads[0][d] * - ls->topologies[n].nthreads[1][d] > - ls->npoints[d]); - } - ls->topology_ntilings[d][n] = tiling; - } - if (debug) { - printf (" Found %d possible tilings\n", ls->ntilings[d]); - printf (" "); - for (int n = 0; n < ls->ntilings[d]; ++n) { - printf (" %d", ls->tilings[d][n].npoints); - } - printf ("\n"); - } - } - - - - /* Simulated annealing state */ - ls->auto_state = NULL; - - /* Hill climbing state */ - ls->hill_state = NULL; - - - - /* Initialise list */ - ls->stattime_list = NULL; - - /* Initialise statistics */ - ls->time_count = 0.0; - ls->time_count_init = 0.0; - ls->time_setup_sum = 0.0; - ls->time_setup_sum2 = 0.0; - ls->time_calc_sum = 0.0; - ls->time_calc_sum2 = 0.0; - ls->time_calc_init = 0.0; - - /* Append to loop statistics list */ - ls->next = lm->statset_list; - lm->statset_list = ls; -} - -lc_statset_t * -lc_statset_find (lc_statmap_t const * restrict const lm, - int const num_threads, - int const npoints[3]) -{ - assert (lm); - - lc_statset_t * ls; - - for (ls = lm->statset_list; ls; ls = ls->next) { - if (ls->num_threads == num_threads && - ls->npoints[0] == npoints[0] && - ls->npoints[1] == npoints[1] && - ls->npoints[2] == npoints[2]) - { - break; - } - } - - return ls; -} - -lc_statset_t * -lc_statset_find_create (lc_statmap_t * restrict const lm, - int const num_threads, - int const npoints[3]) -{ - assert (lm); - - lc_statset_t * ls; - - for (ls = lm->statset_list; ls; ls = ls->next) { - if (ls->num_threads == num_threads && - ls->npoints[0] == npoints[0] && - ls->npoints[1] == npoints[1] && - ls->npoints[2] == npoints[2]) - { - break; - } - } - - if (! ls) { - ls = malloc (sizeof * ls); - assert (ls); - lc_statset_init (ls, lm, num_threads, npoints); - } - - assert (ls); - return ls; -} - - - -/* Initialise loop statistics */ -void -lc_statmap_init (int * restrict const initialised, - lc_statmap_t * restrict const lm, - char const * restrict const name) -{ - /* Check arguments */ - assert (initialised); - assert (lm); - -#pragma omp single - { - - /* Ensure this thorn is active */ - if (! CCTK_IsThornActive (CCTK_THORNSTRING)) { - CCTK_WARN (CCTK_WARN_ABORT, "Thorn LoopControl is used, but has not been activated. (Note: If a thorn has an optional depencency on LoopControl, and if LoopControl is then in your thorn list, then you are using it and need to activate it.)"); - } - - /* Set name */ - lm->name = strdup (name); - - /* Initialise list */ - lm->statset_list = NULL; - - /* Append to loop statistics list */ - lm->next = lc_statmap_list; - lc_statmap_list = lm; - - } - -#pragma omp single - { - /* Set this flag only after initialising */ - * initialised = 1; - } -} - - - -void -lc_control_init (lc_control_t * restrict const lc, - lc_statmap_t * restrict const lm, - int const imin, int const jmin, int const kmin, - int const imax, int const jmax, int const kmax, - int const ilsh, int const jlsh, int const klsh, - int const di) -{ - DECLARE_CCTK_PARAMETERS; - - /* Check arguments */ - assert (lc); - - /* Timer */ - lc->time_setup_begin = omp_get_wtime(); - - /* Check arguments */ - if (! (imin >= 0 && imax <= ilsh && ilsh >= 0) || - ! (jmin >= 0 && jmax <= jlsh && jlsh >= 0) || - ! (kmin >= 0 && kmax <= klsh && klsh >= 0)) - { - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "Illegal loop control arguments:\n" - "name=\"%s\"\n" - "imin=%d imax=%d ilsh=%d\n" - "jmin=%d jmax=%d jlsh=%d\n" - "kmin=%d kmax=%d klsh=%d\n", - lm->name, - imin, imax, ilsh, - jmin, jmax, jlsh, - kmin, kmax, klsh); - } - assert (imin >= 0 && imax <= ilsh && ilsh >= 0); - assert (jmin >= 0 && jmax <= jlsh && jlsh >= 0); - assert (kmin >= 0 && kmax <= klsh && klsh >= 0); - assert (di > 0); - - /* Copy arguments */ - lc->imin = imin; - lc->jmin = jmin; - lc->kmin = kmin; - lc->imax = imax; - lc->jmax = jmax; - lc->kmax = kmax; - lc->ilsh = ilsh; - lc->jlsh = jlsh; - lc->klsh = klsh; - lc->di = di; /* vector size */ - - - - int const num_threads = omp_get_num_threads(); - lc_statset_t * restrict ls; -#pragma omp single copyprivate (ls) - { - /* Calculate number of points */ - /* TODO: Take vector size into account */ - int npoints[3]; - npoints[0] = lc_max (imax - imin, 0); - npoints[1] = lc_max (jmax - jmin, 0); - npoints[2] = lc_max (kmax - kmin, 0); - - ls = lc_statset_find_create (lm, num_threads, npoints); - } - - - - lc_stattime_t * restrict lt; -#pragma omp single copyprivate (lt) - { - - lc_state_t state; - - /* Select topology */ - - if (lc_inthreads != -1 || lc_jnthreads != -1 || lc_knthreads != -1) - { - /* User-specified thread topology */ - - if (lc_inthreads == -1 || lc_jnthreads == -1 || lc_knthreads == -1) { - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "Loop %s: Illegal thread topology [%d,%d,%d]x[1,1,1] specified", - lm->name, - (int)lc_inthreads, (int)lc_jnthreads, (int)lc_knthreads); - } - if (lc_inthreads * lc_jnthreads * lc_knthreads != ls->num_threads) { - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "Loop %s: Specified thread topology [%d,%d,%d]x[1,1,1] is not compatible with the number of threads %d", - lm->name, - (int)lc_inthreads, (int)lc_jnthreads, (int)lc_knthreads, - ls->num_threads); - } - - state.topology = -1; - - } else { - - /* Split in the k direction */ - - for (state.topology = ls->ntopologies - 1; - state.topology >= 0; - -- state.topology) - { - int have_tilings = 1; - for (int d=0; d<3; ++d) { - have_tilings = have_tilings && - ls->topology_ntilings[d][state.topology] > 0; - } - if (have_tilings) break; - } - if (state.topology < 0) { - assert (0); - CCTK_WARN (CCTK_WARN_ABORT, "grid too small"); - } - } - - /* Select tiling */ - - if (lc_inpoints != -1) { - /* User-specified tiling */ - state.tiling[0] = -1; - } else { - /* as many points as possible */ - assert (state.topology >= 0); - state.tiling[0] = ls->topology_ntilings[0][state.topology] - 1; - } - - if (lc_jnpoints != -1) { - /* User-specified tiling */ - state.tiling[1] = -1; - } else { - if (cycle_j_tilings) { - /* cycle through all tilings */ - static int count = 0; - assert (state.topology >= 0); - state.tiling[1] = (count ++) % ls->topology_ntilings[1][state.topology]; - } else if (legacy_init) { - /* as many points as possible */ - assert (state.topology >= 0); - state.tiling[1] = ls->topology_ntilings[1][state.topology] - 1; - } else { - /* as few points as possible */ - state.tiling[1] = 0; - } - } - - if (lc_knpoints != -1) { - /* User-specified tiling */ - state.tiling[2] = -1; - } else { - /* as many points as possible */ - assert (state.topology >= 0); - state.tiling[2] = ls->topology_ntilings[2][state.topology] - 1; - } - - /* Use simulated annealing to find the best loop configuration */ - if (use_simulated_annealing) { - lc_auto_init (ls, & state); - } - /* Use hill climbing to find the best loop configuration */ - if (use_random_restart_hill_climbing) { - lc_hill_init (ls, & state); - } - - /* Find or create database entry */ - - lt = lc_stattime_find_create (ls, & state); - - /* Topology */ - - if (state.topology == -1) { - /* User-specified topology */ - lt->inthreads = lc_inthreads; - lt->jnthreads = lc_jnthreads; - lt->knthreads = lc_knthreads; - lt->inithreads = 1; - lt->jnithreads = 1; - lt->knithreads = 1; - } - - /* Assert thread topology consistency */ - assert (lt->inthreads >= 1); - assert (lt->jnthreads >= 1); - assert (lt->knthreads >= 1); - assert (lt->inithreads >= 1); - assert (lt->jnithreads >= 1); - assert (lt->knithreads >= 1); - assert (lt->inthreads * lt->jnthreads * lt->knthreads * - lt->inithreads * lt->jnithreads * lt->knithreads == - ls->num_threads); - - /* Tilings */ - - if (state.tiling[0] == -1) { - /* User-specified tiling */ - lt->inpoints = lc_inpoints; - } - if (state.tiling[1] == -1) { - /* User-specified tiling */ - lt->jnpoints = lc_jnpoints; - } - if (state.tiling[2] == -1) { - /* User-specified tiling */ - lt->knpoints = lc_knpoints; - } - - /* Assert tiling specification consistency */ - assert (lt->inpoints > 0); - assert (lt->jnpoints > 0); - assert (lt->knpoints > 0); - - } /* omp single */ - - - - lc->statmap = lm; - lc->statset = ls; - lc->stattime = lt; - - - - /*** Threads ****************************************************************/ - - /* Thread loop settings */ - lc->iiimin = imin; - lc->jjjmin = jmin; - lc->kkkmin = kmin; - lc->iiimax = imax; - lc->jjjmax = jmax; - lc->kkkmax = kmax; - lc->iiistep = (lc->iiimax - lc->iiimin + lt->inthreads-1) / lt->inthreads; - lc->jjjstep = (lc->jjjmax - lc->jjjmin + lt->jnthreads-1) / lt->jnthreads; - lc->kkkstep = (lc->kkkmax - lc->kkkmin + lt->knthreads-1) / lt->knthreads; - - /* Find location of current thread */ - lc->thread_num = omp_get_thread_num(); - int c_outer = - lc->thread_num / (lt->inithreads * lt->jnithreads * lt->knithreads); - int const ci = c_outer % lt->inthreads; c_outer /= lt->inthreads; - int const cj = c_outer % lt->jnthreads; c_outer /= lt->jnthreads; - int const ck = c_outer % lt->knthreads; c_outer /= lt->knthreads; - assert (c_outer == 0); - lc->iii = lc->iiimin + ci * lc->iiistep; - lc->jjj = lc->jjjmin + cj * lc->jjjstep; - lc->kkk = lc->kkkmin + ck * lc->kkkstep; - - - - /*** Tilings ****************************************************************/ - - /* Tiling loop settings */ - lc->iimin = ci == 0 ? lc->iii : lc_align (lc->iii, lc->di); - lc->jjmin = lc->jjj; - lc->kkmin = lc->kkk; - lc->iimax = lc_min (lc_align (lc->iii + lc->iiistep, lc->di), lc->iiimax); - lc->jjmax = lc_min (lc->jjj + lc->jjjstep, lc->jjjmax); - lc->kkmax = lc_min (lc->kkk + lc->kkkstep, lc->kkkmax); - lc->iistep = lt->inpoints; - lc->jjstep = lt->jnpoints; - lc->kkstep = lt->knpoints; - - - - /*** Inner threads **********************************************************/ - - /* Inner loop thread parallelism */ - int c_inner = - lc->thread_num % (lt->inithreads * lt->jnithreads * lt->knithreads); - int const cii = c_inner % lt->inithreads; c_inner /= lt->inithreads; - int const cjj = c_inner % lt->jnithreads; c_inner /= lt->jnithreads; - int const ckk = c_inner % lt->knithreads; c_inner /= lt->knithreads; - assert (c_inner == 0); - lc->iiii = cii; - lc->jjjj = cjj; - lc->kkkk = ckk; - lc->iiiistep = - lc_align ((lc->iistep + lt->inithreads - 1) / lt->inithreads, lc->di); - lc->jjjjstep = (lc->jjstep + lt->jnithreads - 1) / lt->jnithreads; - lc->kkkkstep = (lc->kkstep + lt->knithreads - 1) / lt->knithreads; - lc->iiiimin = lc->iiii * lc->iiiistep; - lc->jjjjmin = lc->jjjj * lc->jjjjstep; - lc->kkkkmin = lc->kkkk * lc->kkkkstep; - lc->iiiimax = lc->iiiimin + lc->iiiistep; - lc->jjjjmax = lc->jjjjmin + lc->jjjjstep; - lc->kkkkmax = lc->kkkkmin + lc->kkkkstep; - - - - /****************************************************************************/ - - /* Self test */ - if (do_selftest) { - char * restrict mem; -#pragma omp single copyprivate (mem) - { - mem = - calloc (lc->ilsh * lc->jlsh * lc->klsh, sizeof * lc->selftest_count); - } - lc->selftest_count = mem; - } else { - lc->selftest_count = NULL; - } - - - - /****************************************************************************/ - - /* Timer */ - lc->time_calc_begin = omp_get_wtime(); -} - - - -void -lc_control_selftest (lc_control_t * restrict const lc, - int const imin, int const imax, int const j, int const k) -{ - assert (imin >= 0 && imax <= lc->ilsh); - assert (j >= 0 && j < lc->jlsh); - assert (k >= 0 && k < lc->klsh); - for (int i = imin; i < imax; ++i) { - int const ind3d = i + lc->ilsh * (j + lc->jlsh * k); -#pragma omp atomic - ++ lc->selftest_count[ind3d]; - } -} - - - -void -lc_control_finish (lc_control_t * restrict const lc) -{ - DECLARE_CCTK_PARAMETERS; - - lc_stattime_t * restrict const lt = lc->stattime; - lc_statset_t * restrict const ls = lc->statset; - - int ignore_iteration; - int first_iteration; -#pragma omp single copyprivate (ignore_iteration, first_iteration) - { - ignore_iteration = ignore_initial_overhead && lt->time_count == 0.0; - first_iteration = lt->time_count_init == 0.0; - } - - /* Add a barrier to catch load imbalances */ -#pragma omp barrier - ; - - /* Timer */ - double const time_calc_end = omp_get_wtime(); - double const time_calc_begin = lc->time_calc_begin; - - double const time_setup_end = time_calc_begin; - double const time_setup_begin = lc->time_setup_begin; - - double const time_setup_sum = time_setup_end - time_setup_begin; - double const time_setup_sum2 = pow (time_setup_sum, 2); - - double const time_calc_sum = time_calc_end - time_calc_begin; - double const time_calc_sum2 = pow (time_calc_sum, 2); - - /* Update statistics */ -#pragma omp critical - { - lt->time_count += 1.0; - if (first_iteration) lt->time_count_init += 1.0; - - if (! ignore_iteration) lt->time_setup_sum += time_setup_sum; - if (! ignore_iteration) lt->time_setup_sum2 += time_setup_sum2; - - lt->time_calc_sum += time_calc_sum; - lt->time_calc_sum2 += time_calc_sum2; - if (first_iteration) lt->time_calc_init += time_calc_sum; - - ls->time_count += 1.0; - if (first_iteration) ls->time_count_init += 1.0; - - if (! ignore_iteration) ls->time_setup_sum += time_setup_sum; - if (! ignore_iteration) ls->time_setup_sum2 += time_setup_sum2; - - ls->time_calc_sum += time_calc_sum; - ls->time_calc_sum2 += time_calc_sum2; - if (first_iteration) ls->time_calc_init += time_calc_sum; - } - -#pragma omp master - { - lt->last_updated = time_calc_end; - } - -/* #pragma omp barrier */ - - { - if (use_simulated_annealing) { -#pragma omp single - { - lc_auto_finish (ls, lt); - } - } - if (use_random_restart_hill_climbing) { -#pragma omp single - { - lc_hill_finish (ls, lt); - } - } - } - - /* Perform self-check */ - - if (do_selftest) { - /* Assert that exactly the specified points have been set */ - static int failure = 0; - int k; /* PGI doesn't allow declaring k in the loop */ -#pragma omp for - // for (int k=0; kklsh; ++k) { - for (k=0; kklsh; ++k) { - for (int j=0; jjlsh; ++j) { - for (int i=0; iilsh; ++i) { - int const ind3d = i + lc->ilsh * (j + lc->jlsh * k); - char const inside = - (i >= lc->imin && i < lc->imax) && - (j >= lc->jmin && j < lc->jmax) && - (k >= lc->kmin && k < lc->kmax); - if (lc->selftest_count[ind3d] != inside) { -#pragma omp critical - { - failure = 1; - fprintf (stderr, " i=[%d,%d,%d] count=%d expected=%d\n", - i, j, k, - (int) lc->selftest_count[ind3d], (int) inside); - } - } - } - } - } - if (failure) { - for (int n=0; nimin, lc->jmin, lc->kmin); - fprintf (stderr, " imax=[%d,%d,%d]\n", lc->imax, lc->jmax, lc->kmax); - fprintf (stderr, " ilsh=[%d,%d,%d]\n", lc->ilsh, lc->jlsh, lc->klsh); - fprintf (stderr, " di=%d\n", lc->di); - fprintf (stderr, " Thread parallellism:\n"); - fprintf (stderr, " iiimin =[%d,%d,%d]\n", lc->iiimin, lc->jjjmin, lc->kkkmin); - fprintf (stderr, " iiimax =[%d,%d,%d]\n", lc->iiimax, lc->jjjmax, lc->kkkmax); - fprintf (stderr, " iiistep=[%d,%d,%d]\n", lc->iiistep, lc->jjjstep, lc->kkkstep); - fprintf (stderr, " Current thread:\n"); - fprintf (stderr, " thread_num=%d\n", lc->thread_num); - fprintf (stderr, " iii =[%d,%d,%d]\n", lc->iii, lc->jjj, lc->kkk); - fprintf (stderr, " iiii =[%d,%d,%d]\n", lc->iiii, lc->jjjj, lc->kkkk); - fprintf (stderr, " Tiling loop:\n"); - fprintf (stderr, " iimin =[%d,%d,%d]\n", lc->iimin, lc->jjmin, lc->kkmin); - fprintf (stderr, " iimax =[%d,%d,%d]\n", lc->iimax, lc->jjmax, lc->kkmax); - fprintf (stderr, " iistep=[%d,%d,%d]\n", lc->iistep, lc->jjstep, lc->kkstep); - fprintf (stderr, " Inner thread parallelism:\n"); - fprintf (stderr, " iiiimin =[%d,%d,%d]\n", lc->iiiimin, lc->jjjjmin, lc->kkkkmin); - fprintf (stderr, " iiiimax =[%d,%d,%d]\n", lc->iiiimax, lc->jjjjmax, lc->kkkkmax); - fprintf (stderr, " iiiistep=[%d,%d,%d]\n", lc->iiiistep, lc->jjjjstep, lc->kkkkstep); - } -#pragma omp barrier - ; - } -#pragma omp critical - { - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "LoopControl loop \"%s\" self-test failed", - lc->statmap->name); - } - } - // xlC on VIP doesn't like this "nowait" -#pragma omp single // nowait - { - free (lc->selftest_count); - lc->selftest_count = NULL; - } - } -} - - -/* appears to be unused -static -double -avg (double const c, double const s) -{ - if (c == 0.0) return 0.0; - return s / c; -} - -static -double -stddev (double const c, double const s, double const s2) -{ - if (c == 0.0) return 0.0; - return sqrt (s2 / c - pow (s / c, 2)); -} -*/ - - - -/* Output statistics */ -void -lc_printstats (void); -void -lc_printstats (void) -{ - DECLARE_CCTK_PARAMETERS; - - printf ("LoopControl timing statistics:\n"); - - double total_calc_time = 0.0; - for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) { - for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) { - for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { - total_calc_time += lt->time_calc_sum; - } - } - } - - double total_saved = 0.0; - int nmaps = 0; - for (lc_statmap_t * lm = lc_statmap_list; lm; lm = lm->next) { - - double calc_time = 0.0; - for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) { - for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { - calc_time += lt->time_calc_sum; - } - } - if (calc_time < printstats_threshold / 100.0 * total_calc_time) continue; - - if (printstats_verbosity >= 1) { - printf ("Loop #%d \"%s\":\n", - nmaps, - lm->name); - } - double lm_sum_count = 0.0; - double lm_sum_setup = 0.0; - double lm_sum_calc = 0.0; - double lm_sum_count_init = 0.0; - double lm_sum_init = 0.0; - double lm_sum_improv = 0.0; - int nsets = 0; - for (lc_statset_t * ls = lm->statset_list; ls; ls = ls->next) { - if (printstats_verbosity >= 2) { - printf (" Parameter set #%d nthreads=%d npoints=[%d,%d,%d]\n", - nsets, - ls->num_threads, ls->npoints[0], ls->npoints[1], ls->npoints[2]); - } - double sum_count = 0.0; - double sum_setup = 0.0; - double sum_calc = 0.0; - double sum_count_init = 0.0; - double sum_init = 0.0; - double min_calc = DBL_MAX; - int imin_calc = -1; - double max_calc = 0.0; - int imax_calc = -1; - int ntimes = 0; - for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { - if (printstats_verbosity >= 3) { - printf (" Configuration #%d topology=%d [%d,%d,%d]x[%d,%d,%d] tiling=[%d,%d,%d]\n", - ntimes, - lt->state.topology, - lt->inthreads, lt->jnthreads, lt->knthreads, - lt->inithreads, lt->jnithreads, lt->knithreads, - lt->inpoints, lt->jnpoints, lt->knpoints); - } - double const count = lt->time_count; - double const setup = lt->time_setup_sum / count; - double const calc = lt->time_calc_sum / count; - double const init = lt->time_calc_init / lt->time_count_init; - if (printstats_verbosity >= 3) { - printf (" count: %g setup: %g first: %g calc: %g\n", - count, setup, init, calc); - } - sum_count += lt->time_count; - sum_setup += lt->time_setup_sum; - sum_calc += lt->time_calc_sum; - sum_count_init += lt->time_count_init; - sum_init += lt->time_calc_init; - if (calc < min_calc) { - min_calc = calc; - imin_calc = ntimes; - } - if (calc > max_calc) { - max_calc = calc; - imax_calc = ntimes; - } - ++ ntimes; - } - double const init_calc = sum_init / sum_count_init; - double const avg_calc = sum_calc / sum_count; - double const saved = (init_calc - avg_calc) * sum_count; - double const improv = (init_calc - min_calc) / init_calc; - if (printstats_verbosity >= 2) { - printf (" total count: %g total setup: %g total calc: %g\n", - sum_count, sum_setup, sum_calc); - printf (" avg calc: %g min calc: %g (#%d) max calc: %g (#%d)\n", - avg_calc, min_calc, imin_calc, max_calc, imax_calc); - if (printstats_verbosity < 3) { - int ntimes = 0; - for (lc_stattime_t * lt = ls->stattime_list; lt; lt = lt->next) { - if (ntimes == imin_calc || ntimes == imax_calc) { - printf (" #%d: topology=%d [%d,%d,%d]x[%d,%d,%d] tiling=[%d,%d,%d]\n", - ntimes, - lt->state.topology, - lt->inthreads, lt->jnthreads, lt->knthreads, - lt->inithreads, lt->jnithreads, lt->knithreads, - lt->inpoints, lt->jnpoints, lt->knpoints); - } - ++ ntimes; - } - } - printf (" first calc: %g improvement: %.0f%% saved: %g\n", - init_calc, 100.0*improv, saved); - } - lm_sum_count += sum_count; - lm_sum_setup += sum_setup; - lm_sum_calc += sum_calc; - lm_sum_count_init += sum_count_init; - lm_sum_init += sum_init; - lm_sum_improv += improv * sum_count; - ++ nsets; - } - double const init_calc = lm_sum_init / lm_sum_count_init; - double const avg_calc = lm_sum_calc / lm_sum_count; - double const saved = (init_calc - avg_calc) * lm_sum_count; - double const avg_improv = lm_sum_improv / lm_sum_count; - if (printstats_verbosity >= 1) { - printf (" total count: %g total setup: %g total calc: %g\n", - lm_sum_count, lm_sum_setup, lm_sum_calc); - printf (" avg calc: %g avg first calc: %g\n", - avg_calc, init_calc); - printf (" avg improvement: %.0f%% saved: %g seconds\n", - 100.0*avg_improv, saved); - } - total_saved += saved; - ++ nmaps; - } - - printf ("Total calculation time: %g seconds; total saved time: %g seconds\n", - total_calc_time, total_saved); -} - - - -void -lc_printstats_analysis (CCTK_ARGUMENTS); -void -lc_printstats_analysis (CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - static int last_output = 0; - - int const current_time = CCTK_RunTime(); - if (current_time >= last_output + 60.0 * printstats_every_minutes) { - last_output = current_time; - lc_printstats (); - } -} - -void -lc_printstats_terminate (CCTK_ARGUMENTS); -void -lc_printstats_terminate (CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS; - - lc_printstats (); -} - -CCTK_FCALL -void -CCTK_FNAME (lc_get_fortran_type_sizes) (int * lc_control_size, - int * lc_statmap_size); - -int -lc_check_type_sizes (void) -{ - /* check that the sizes of LoopControls control structures are the - * same in loopcontrol.h and loopcontrol_fortran.h */ - - int lc_control_size, lc_statmap_size; - - CCTK_FNAME (lc_get_fortran_type_sizes) ( & lc_control_size, - & lc_statmap_size ); - - if ( lc_control_size != sizeof(lc_control_t) || - lc_statmap_size != sizeof(lc_statmap_t) ) { - CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, - "Fortran and C control structures (lc_statmap_t and lc_control_t) " - "differ in size. If you are not using Fortran or believe " - "your Fortran compiler to be strange, then this check can be " - "disabled by setting LoopControl::check_type_sizes=no."); - } - - return 0; -} - - -CCTK_FCALL -void -CCTK_FNAME (lc_statmap_init) (int * restrict const initialised, - lc_statmap_t * restrict const lm, - ONE_FORTSTRING_ARG); -CCTK_FCALL -void -CCTK_FNAME (lc_statmap_init) (int * restrict const initialised, - lc_statmap_t * restrict const lm, - ONE_FORTSTRING_ARG) -{ - ONE_FORTSTRING_CREATE (name); - lc_statmap_init (initialised, lm, name); - free (name); -} - -CCTK_FCALL -void -CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc, - lc_statmap_t * restrict const lm, - int const * restrict const imin, - int const * restrict const jmin, - int const * restrict const kmin, - int const * restrict const imax, - int const * restrict const jmax, - int const * restrict const kmax, - int const * restrict const ilsh, - int const * restrict const jlsh, - int const * restrict const klsh); -CCTK_FCALL -void -CCTK_FNAME (lc_control_init) (lc_control_t * restrict const lc, - lc_statmap_t * restrict const lm, - int const * restrict const imin, - int const * restrict const jmin, - int const * restrict const kmin, - int const * restrict const imax, - int const * restrict const jmax, - int const * restrict const kmax, - int const * restrict const ilsh, - int const * restrict const jlsh, - int const * restrict const klsh) -{ - lc_control_init (lc, lm, - * imin - 1, * jmin - 1, * kmin - 1, - * imax, * jmax, * kmax, - * ilsh, * jlsh, * klsh, - 1); -} - -CCTK_FCALL -void -CCTK_FNAME (lc_control_finish) (lc_control_t * restrict const lc); -CCTK_FCALL -void -CCTK_FNAME (lc_control_finish) (lc_control_t * restrict const lc) -{ - lc_control_finish (lc); -} diff --git a/Carpet/LoopControl/src/loopcontrol.cc b/Carpet/LoopControl/src/loopcontrol.cc new file mode 100644 index 000000000..2c37a499c --- /dev/null +++ b/Carpet/LoopControl/src/loopcontrol.cc @@ -0,0 +1,827 @@ +#include "loopcontrol.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _OPENMP +# include +#else + +// Simple non-OpenMP implementations +static inline int omp_get_num_threads() { return 1; } +static inline int omp_get_thread_num() { return 0; } + +#endif + +#ifdef HAVE_CAPABILITY_CYCLECLOCK +// We have a fast, accurate clock + +# include + +#else + +# ifdef _OPENMP +// We use the OpenMP clock + +typedef double ticks; +static inline ticks getticks() { return omp_get_wtime(); } +static inline double elapsed(ticks t1, ticks t0) { return t1-t0; } +static inline double seconds_per_tick() { return 1.0; } + +# else +// We use gettimeofday as fallback + +# include +typedef timeval ticks; +static inline ticks getticks() +{ + timeval tp; + gettimeofday(&tp, NULL); + return tp; +} +static inline double elapsed(ticks t1, ticks t0) +{ + return 1.0e+6 * (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec); +} +static inline double seconds_per_tick() { return 1.0e-6; } + +# endif +#endif + +using namespace std; + + + +struct lc_thread_info_t { + char padding1[128]; // pad to ensure cache lines are not shared + int idx; // linear index of next coarse thread block + char padding2[128]; +}; + +struct lc_fine_thread_comm_t { + char padding1[128]; // pad to ensure cache lines are not shared + int state; // waiting threads + int value; // broadcast value + char padding2[128]; +}; + + + +struct lc_stats_t { + char const* name; + char const* file; + int line; + int init_count; + double points, threads; + double count, sum, sum2, min, max; + ticks start_time; +}; + + + +extern "C" CCTK_FCALL +void CCTK_FNAME(lc_get_fortran_type_sizes) (ptrdiff_t* type_sizes); + + + +namespace { + + list all_stats; + + + + void check_fortran_type_sizes() + { + ptrdiff_t type_sizes[4]; + CCTK_FNAME(lc_get_fortran_type_sizes) (type_sizes); + assert(type_sizes[0] == sizeof(lc_vec_t)); + assert(type_sizes[1] == sizeof(lc_ivec_t)); + assert(type_sizes[2] == sizeof(lc_space_t)); + assert(type_sizes[3] == sizeof(lc_control_t)); + } + + + + template + T divup(T const i, T const j) + { + assert(i >= 0 and j > 0); + return (i + j - 1) / j; + } + + template + T alignup(T const i, T const j) + { + return divup(i, j) * j; + } + + template + T divdown(T const i, T const j) + { + assert(i >= 0 and j > 0); + return i / j; + } + + template + T aligndown(T const i, T const j) + { + return divdown(i, j) * j; + } + + + + ostream& operator<<(ostream& os, lc_vec_t const& x) + { + os << "["; + for (int d=0; d0) os << ","; + os << x.v[d]; + } + os << "]"; + return os; + } + + ostream& operator<<(ostream& os, lc_ivec_t const& x) + { + os << "["; + for (int d=0; d0) os << ","; + os << x.v[d]; + } + os << "]"; + return os; + } + + ostream& operator<<(ostream& os, lc_space_t const& s) + { + os << "{" + << "min:" << s.min << "," + << "max:" << s.max << "," + << "step:" << s.step << "," + << "pos:" << s.pos << "," + << "count:" << s.count << "," + << "idx:" << s.idx + << "}"; + return os; + } + + ostream& operator<<(ostream& os, lc_control_t const& c) + { + os << "lc_control{\n" + << " ash:" << c.ash << ",\n" + << " loop:" << c.loop << ",\n" + << " thread:" << c.thread << ",\n" + << " coarse:" << c.coarse << ",\n" + << " fine:" << c.fine << "\n" + << " fine_thread:" << c.fine_thread << "\n" + << "}\n"; + return os; + } + + + + ptrdiff_t prod(lc_vec_t const& x) + { + ptrdiff_t r = 1; + for (int d=0; d= 0); + r *= x.v[d]; + } + return r; + } + + ptrdiff_t ind(lc_vec_t const& shape, lc_vec_t const& pos) + { + ptrdiff_t r = 0; + ptrdiff_t f = 1; + for (int d=0; d= 0 and pos.v[d] < shape.v[d]); + r += f * pos.v[d]; + assert(shape.v[d] >= 0); + f *= shape.v[d]; + } + return r; + } + + ptrdiff_t ind(lc_vec_t const& shape, + ptrdiff_t const i, ptrdiff_t const j, ptrdiff_t const k) + { + lc_vec_t const pos = {{ i, j, k }}; + return ind(shape, pos); + } + + + + void space_set_count(lc_space_t& space) + { + for (int d=0; d= 0); + for (int d=0; d 0) { + space.idx.v[d] = gidx % space.count.v[d]; + gidx /= space.count.v[d]; + } else { + space.idx.v[d] = 0; + } + } + return gidx != 0; + } + + int space_local2global(lc_space_t const& space) + { + int gidx = 0; + int fact = 1; + for (int d=0; d= 0 and space.idx.v[d] < space.count.v[d]); + gidx += fact * space.idx.v[d]; + fact *= space.count.v[d]; + } + return gidx; + } + + + + int get_num_fine_threads() + { + DECLARE_CCTK_PARAMETERS; + if (not use_smt_threads) return 1; + if (omp_get_num_threads() == 1) return 1; + static int num_smt_threads = -1; + if (CCTK_BUILTIN_EXPECT(num_smt_threads<0, false)) { +#pragma omp barrier + 0; // PGI compiler needs this +#pragma omp master + if (CCTK_IsFunctionAliased("GetNumSMTThreads")) { + num_smt_threads = GetNumSMTThreads(); + } else { + num_smt_threads = 1; + } +#pragma omp barrier + } + return num_smt_threads; + } + + int get_fine_thread_num() + { + DECLARE_CCTK_PARAMETERS; + if (not use_smt_threads) return 0; + if (omp_get_num_threads() == 1) return 0; + int const thread_num = omp_get_thread_num(); + int const num_fine_threads = get_num_fine_threads(); + return thread_num % num_fine_threads; + } + + int get_num_coarse_threads() + { + int const num_threads = omp_get_num_threads(); + int const num_fine_threads = get_num_fine_threads(); + assert(num_threads % num_fine_threads == 0); + return num_threads / num_fine_threads; + } + + int get_coarse_thread_num() + { + int const thread_num = omp_get_thread_num(); + int const num_fine_threads = get_num_fine_threads(); + return thread_num / num_fine_threads; + } + + // Wait until *ptr is different from old_value + void thread_wait(int const *const ptr, int const old_value) + { + while (*ptr == old_value) { +#pragma omp flush + 0; // PGI compiler needs this + } + } + + int fine_thread_broadcast(lc_fine_thread_comm_t* const comm, int value) + { + int const num_fine_threads = get_num_fine_threads(); + if (num_fine_threads == 1) return value; + assert(num_fine_threads < 8 * int(sizeof comm->state)); + int const fine_thread_num = get_fine_thread_num(); + int const master_mask = 1; + + // Assume comm->count == 0 initially + if (fine_thread_num == 0) { // if on master + + int const all_threads_mask = (1 << num_fine_threads) - 1; + if (comm->state != 0) { + // wait until everybody has acknowledged the previous value +#pragma omp flush + for (;;) { + int const state = comm->state; + if (comm->state == all_threads_mask) break; + thread_wait(&comm->state, state); + } + // mark the value as invalid + comm->state = 0; +#pragma omp flush + } + // publish value + comm->value = value; +#pragma omp flush + // mark the value as valid + comm->state = master_mask; +#pragma omp flush + + } else { // if not on master + + // wait until the value is valid, and it is a new value + int const thread_mask = 1 << fine_thread_num; +#pragma omp flush + for (;;) { + int const state = comm->state; + if ((comm->state & (master_mask | thread_mask)) == master_mask) break; + thread_wait(&comm->state, state); + } + // read value + value = comm->value; +#pragma omp flush + 0; // PGI compiler needs this + // acknowledge the value +#pragma omp atomic + comm->state |= thread_mask; +#pragma omp flush + + } // if not on master + + return value; + } + +} // namespace + + + +void lc_stats_init(lc_stats_t** const stats_ptr, + char const* const name, + char const* const file, + int const line) +{ + if (*stats_ptr) return; + + lc_stats_t* stats; +#pragma omp single copyprivate(stats) + { + stats = new lc_stats_t; + + stats->name = strdup(name); + stats->file = strdup(file); + stats->line = line; + stats->init_count = 0; + stats->points = 0.0; + stats->threads = 0.0; + stats->count = 0.0; + stats->sum = 0.0; + stats->sum2 = 0.0; + stats->min = numeric_limits::max(); + stats->max = 0.0; + + all_stats.push_back(stats); + } +#pragma omp single + { + *stats_ptr = stats; + } +} + + + +void lc_control_init(lc_control_t* restrict const control, + lc_stats_t* const stats, + ptrdiff_t imin, ptrdiff_t jmin, ptrdiff_t kmin, + ptrdiff_t imax, ptrdiff_t jmax, ptrdiff_t kmax, + ptrdiff_t iash, ptrdiff_t jash, ptrdiff_t kash, + ptrdiff_t di, ptrdiff_t dj, ptrdiff_t dk) +{ + DECLARE_CCTK_PARAMETERS; + +#pragma omp barrier +#pragma omp master + { + stats->start_time = getticks(); + } + + // Ensure thread counts are consistent + assert(get_num_coarse_threads() * get_num_fine_threads() == + omp_get_num_threads()); + + // Get cache line size + static ptrdiff_t max_cache_linesize = -1; + if (CCTK_BUILTIN_EXPECT(max_cache_linesize<0, false)) { +#pragma omp barrier +#pragma omp master + if (CCTK_IsFunctionAliased("GetCacheInfo1")) { + int const num_levels = GetCacheInfo1(NULL, NULL, 0); + vector linesizes(num_levels); + vector strides (num_levels); + GetCacheInfo1(&linesizes[0], &strides[0], num_levels); + max_cache_linesize = 1; + for (int level=0; levelash.v[d] = ash[d]; + } + + // Set up multithreading state + lc_thread_info_t* thread_info_ptr; +#pragma omp single copyprivate(thread_info_ptr) + { + thread_info_ptr = new lc_thread_info_t; + } + control->thread_info_ptr = thread_info_ptr; + + { + lc_fine_thread_comm_t** fine_thread_comm_ptrs; +#pragma omp single copyprivate(fine_thread_comm_ptrs) + { + fine_thread_comm_ptrs = + new lc_fine_thread_comm_t*[get_num_coarse_threads()]; + } + if (get_fine_thread_num() == 0) { + lc_fine_thread_comm_t* fine_thread_comm_ptr; + fine_thread_comm_ptr = new lc_fine_thread_comm_t; + fine_thread_comm_ptr->state = 0; + fine_thread_comm_ptrs[get_coarse_thread_num()] = fine_thread_comm_ptr; + } +#pragma omp barrier + control->fine_thread_comm_ptr = + fine_thread_comm_ptrs[get_coarse_thread_num()]; +#pragma omp barrier +#pragma omp single nowait + { + delete[] fine_thread_comm_ptrs; + } + } + + // Set loop sizes + for(int d=0; dloop.min.v[d] = loop_min[d]; + control->loop.max.v[d] = loop_max[d]; + // Thread loop +#if VECTORISE_ALIGNED_ARRAYS + // Move start to be aligned with vector size + control->thread.min.v[d] = + aligndown(control->loop.min.v[d], vect_size[d]); +#else + control->thread.min.v[d] = control->loop.min.v[d]; +#endif + control->thread.max.v[d] = loop_max[d]; + // Fine threads + control->fine_thread.count.v[d] = smt_size[d]; + } + { + int const fine_thread_num = get_fine_thread_num(); + bool const outside = + space_global2local(control->fine_thread, fine_thread_num); + assert(not outside); + } + + // Set loop step sizes + if (CCTK_EQUALS(initial_setup, "legacy")) { + // Like a non-LoopControl loop: no loop tiling (i.e. do not use + // coarse loops), parallelise only in k direction (i.e. assign + // equal k ranges to threads) + for(int d=0; dfine_thread.step.v[d] = vect_size[d]; + control->fine.step.v[d] = vect_size[d]; + ptrdiff_t const npoints = control->loop.max.v[d] - control->loop.min.v[d]; + ptrdiff_t const nthreads = d!=LC_DIM-1 ? 1 : get_num_coarse_threads(); + control->coarse.step.v[d] = + alignup(divup(npoints, nthreads), control->fine.step.v[d]); + control->thread.step.v[d] = alignup(npoints, control->coarse.step.v[d]); + } + } else if (CCTK_EQUALS(initial_setup, "tiled")) { + // Basic LoopControl setup + for(int d=0; dfine_thread.step.v[d] = vect_size[d]; + control->fine.step.v[d] = + alignup(smt_size[d], control->fine_thread.step.v[d]); + control->coarse.step.v[d] = + alignup(tile_size[d], control->fine.step.v[d]); + control->thread.step.v[d] = + alignup(loop_size[d], control->coarse.step.v[d]); + } + } else { + CCTK_WARN(CCTK_WARN_ABORT, "internal error"); + } + + if (veryverbose) { +#pragma omp master + CCTK_VInfo(CCTK_THORNSTRING, + "Loop %s (%s:%d): imin=[%td,%td,%td] imax=[%td,%td,%td]\n" + " smt.step=[%td,%td,%td] fine.step=[%td,%td,%td] coarse.step=[%td,%td,%td] thread.step=[%td,%td,%td]", + stats->name, stats->file, stats->line, + control->loop.min.v[0], control->loop.min.v[1], control->loop.min.v[2], + control->loop.max.v[0], control->loop.max.v[1], control->loop.max.v[2], + control->fine_thread.step.v[0], control->fine_thread.step.v[1], control->fine_thread.step.v[2], + control->fine.step.v[0], control->fine.step.v[1], control->fine.step.v[2], + control->coarse.step.v[0], control->coarse.step.v[1], control->coarse.step.v[2], + control->thread.step.v[0], control->thread.step.v[1], control->thread.step.v[2]); + } + + // Initialise selftest + if (selftest) { + unsigned char* restrict selftest_array; +#pragma omp single copyprivate(selftest_array) + { + ptrdiff_t const npoints = prod(control->ash); + selftest_array = new unsigned char[npoints]; + memset(selftest_array, 0, npoints * sizeof *selftest_array); + } + control->selftest_array = selftest_array; + } else { + control->selftest_array = NULL; + } +} + +void lc_control_finish(lc_control_t* restrict const control, + lc_stats_t* restrict const stats) +{ + DECLARE_CCTK_PARAMETERS; + +#pragma omp barrier + + // Finish selftest + if (selftest) { + assert(control->selftest_array); +#pragma omp barrier +#pragma omp single nowait + { + ptrdiff_t nfailed = 0; + for (ptrdiff_t k=0; kash.v[2]; ++k) { + for (ptrdiff_t j=0; jash.v[1]; ++j) { + for (ptrdiff_t i=0; iash.v[0]; ++i) { + bool const inside = + i >= control->loop.min.v[0] and i < control->loop.max.v[0] and + j >= control->loop.min.v[1] and j < control->loop.max.v[1] and + k >= control->loop.min.v[2] and k < control->loop.max.v[2]; + ptrdiff_t const ipos = ind(control->ash, i,j,k); + nfailed += control->selftest_array[ipos] != inside; + } + } + } + if (nfailed > 0) { + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "LoopControl self-test failed"); + } + delete[] control->selftest_array; + } + control->selftest_array = NULL; + } + + // Collect statistics +#pragma omp barrier +#pragma omp master + { + ticks const end_time = getticks(); + double const elapsed_time = + seconds_per_tick() * elapsed(end_time, stats->start_time); + ptrdiff_t npoints = 1; + for (int d=0; dloop.max.v[d] - control->loop.min.v[d]; + } + if (stats->init_count < 1) { + // Skip the first iteration + ++stats->init_count; + if (veryverbose) { + double const time_point = + elapsed_time * omp_get_num_threads() / npoints; + CCTK_VInfo(CCTK_THORNSTRING, + "Loop %s: time=%g, time/point=%g s", + stats->name, elapsed_time, time_point); + } + } else { + stats->points += double(npoints); + stats->threads += double(omp_get_num_threads()); + stats->count += 1.0; + stats->sum += elapsed_time; + stats->sum2 += pow(elapsed_time, 2.0); + stats->min = fmin(stats->min, elapsed_time); + stats->max = fmax(stats->max, elapsed_time); + if (veryverbose) { + double const avg_thread = stats->sum / stats->count; + double const avg_point = + stats->sum * stats->threads / (stats->count * stats->points); + CCTK_VInfo(CCTK_THORNSTRING, + "Loop %s: count=%g, avg/thread=%g s, avg/point=%g s", + stats->name, stats->count, avg_thread, avg_point); + } + } + } + + // Tear down multithreading state +#pragma omp single nowait + { + delete control->thread_info_ptr; + } + control->thread_info_ptr = NULL; + if (get_fine_thread_num() == 0) { + delete control->fine_thread_comm_ptr; + } + control->fine_thread_comm_ptr = NULL; +} + + + +void lc_thread_init(lc_control_t* restrict const control) +{ + space_set_count(control->thread); +#pragma omp single + { + control->thread_info_ptr->idx = get_num_coarse_threads(); + } + control->thread_done = + space_global2local(control->thread, get_coarse_thread_num()); + space_idx2pos(control->thread); +} + +int lc_thread_done(lc_control_t const* restrict const control) +{ + return control->thread_done; +} + +void lc_thread_step(lc_control_t* restrict const control) +{ + // Get next thread block + int new_global_idx; + if (get_fine_thread_num() == 0) { +#pragma omp critical(LoopControl_lc_thread_step) + { + new_global_idx = control->thread_info_ptr->idx++; + } + } + new_global_idx = + fine_thread_broadcast(control->fine_thread_comm_ptr, new_global_idx); + control->thread_done = space_global2local(control->thread, new_global_idx); + space_idx2pos(control->thread); +} + + + +void lc_selftest_set(lc_control_t const* restrict control, + ptrdiff_t const lmin, ptrdiff_t const lmax, + ptrdiff_t const imin, ptrdiff_t const imax, + ptrdiff_t const di, + ptrdiff_t const i0, ptrdiff_t const j, ptrdiff_t const k) +{ + DECLARE_CCTK_PARAMETERS; + assert(selftest); + assert(imin>=0 and iminash.v[0]); + assert(di>0); + assert(j>=0 and jash.v[1]); + assert(k>=0 and kash.v[2]); + assert(i0+di-1>=control->loop.min.v[0] and i0loop.max.v[0]); + if (imin>lmin) { + ptrdiff_t const ipos_imin = ind(control->ash, imin,j,k); + assert(ipos_imin % di == 0); + } + if (imaxash, imax,j,k); + assert(ipos_imax % di == 0); + } + assert(j>=control->loop.min.v[1] and jloop.max.v[1]); + assert(k>=control->loop.min.v[2] and kloop.max.v[2]); + for (ptrdiff_t i=i0; i=imin and i=0 and iash.v[0]); + assert(i>=control->loop.min.v[0] and iloop.max.v[0]); + ptrdiff_t const ipos = ind(control->ash, i,j,k); + unsigned char& elt = control->selftest_array[ipos]; +#pragma omp atomic + ++elt; + if (elt!=1) { +#pragma omp critical + { + fprintf(stderr, + "thread=%d/%d fine_thread=%d/%d ijk=[%td,%td,%td]\n", + get_coarse_thread_num(), get_num_coarse_threads(), + get_fine_thread_num(), get_num_fine_threads(), + i,j,k); + assert(elt==1); + } + } + } + } +} + + + +int lc_setup(void) +{ + check_fortran_type_sizes(); + return 0; +} + + + +void lc_statistics(CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + CCTK_INFO("LoopControl statistics:"); + for (list::const_iterator + istats = all_stats.begin(); istats != all_stats.end(); ++istats) + { + lc_stats_t const* restrict const stats = *istats; + if (stats->count == 0.0) { + printf(" Loop %s (%s:%d):\n", + stats->name, stats->file, stats->line); + } else { + double const avg_thread = stats->sum / stats->count; + double const avg_point = + stats->sum * stats->threads / (stats->count * stats->points); + printf(" Loop %s (%s:%d): count=%g, avg/thread=%g s, avg/point=%g s\n", + stats->name, stats->file, stats->line, + stats->count, avg_thread, avg_point); + } + } +} + +void lc_statistics_maybe(CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + if (not verbose) return; + + lc_statistics(CCTK_PASS_CTOC); +} diff --git a/Carpet/LoopControl/src/loopcontrol.h b/Carpet/LoopControl/src/loopcontrol.h index a6bb71be2..04e951c23 100644 --- a/Carpet/LoopControl/src/loopcontrol.h +++ b/Carpet/LoopControl/src/loopcontrol.h @@ -1,392 +1,287 @@ -#ifndef LC_LOOPCONTROL_H -#define LC_LOOPCONTROL_H +#ifndef LOOPCONTROL_H +#define LOOPCONTROL_H /* This file uses the namespace LC_* for macros and lc_* for C identifiers. */ -#include - +#define LC_DIM 3 + #ifdef CCODE +#include +#include + +#include + #ifdef __cplusplus extern "C" { #endif - - - -/* A topology */ -typedef struct lc_topology_t { - int nthreads[2][3]; /* [0:outer|1:inner][ijk] */ -} lc_topology_t; - -/* A tiling specification */ -typedef struct lc_tiling_t { - int npoints; -} lc_tiling_t; - - - -typedef struct lc_state_t { - int topology; - int tiling[3]; -} lc_state_t; - - - -/* For simulated annealing */ -typedef struct lc_auto_state_t lc_auto_state_t; - -/* For hill climbing */ -typedef struct lc_hill_state_t lc_hill_state_t; - - - -/* Statistics for one control parameter set (thread topology and - tiling specification) of one user parameter set of one loop */ -typedef struct lc_stattime_t { - struct lc_stattime_t * next; - - /* Keys */ - - lc_state_t state; - int inthreads, jnthreads, knthreads; - int inithreads, jnithreads, knithreads; - int inpoints, jnpoints, knpoints; - - /* Data */ - - /* Statistics */ - /* number of calls and threads */ - double time_count, time_count_init; - /* time spent setting up loops */ - double time_setup_sum, time_setup_sum2; - /* time spent iterating */ - double time_calc_sum, time_calc_sum2; - double time_calc_init; /* time for first calculation */ - - /* wall time tag */ - double last_updated; -} lc_stattime_t; - - - -/* Statistics for one user parameter set (number of threads and number - of iterations) of one loop */ -typedef struct lc_statset_t { - struct lc_statset_t * next; - /* Keys */ - int num_threads; - int npoints[3]; /* [dim] */ - /* Data */ + static inline ptrdiff_t lc_min(ptrdiff_t const i, ptrdiff_t const j) + { + return i < j ? i : j; + } - /* Thread topologies */ - lc_topology_t * restrict topologies; /* [topology] */ - int ntopologies; + static inline ptrdiff_t lc_max(ptrdiff_t const i, ptrdiff_t const j) + { + return i > j ? i : j; + } - /* Tiling specifications */ - lc_tiling_t * restrict tilings[3]; /* [dim] */ - int ntilings[3]; /* [dim] */ - int * restrict topology_ntilings[3]; /* [dim][topology] */ - /* Simulated annealing state */ - lc_auto_state_t * auto_state; - /* Hill climbing state */ - lc_hill_state_t * hill_state; + struct lc_thread_info_t; + struct lc_fine_thread_comm_t; - lc_stattime_t * stattime_list; + struct lc_stats_t; - /* Statistics */ - /* number of calls and threads */ - double time_count, time_count_init; - /* time spent setting up loops */ - double time_setup_sum, time_setup_sum2; - /* time spent iterating */ - double time_calc_sum, time_calc_sum2; - double time_calc_init; /* time for first calculation */ -} lc_statset_t; - - - -/* Statistics for one loop (one source code location) */ -typedef struct lc_statmap_t { - struct lc_statmap_t * next; /* for linked list */ + typedef struct { + ptrdiff_t v[LC_DIM]; + } lc_vec_t; - /* Name */ - char const * restrict name; + typedef struct { + int v[LC_DIM]; + } lc_ivec_t; - lc_statset_t * statset_list; -} lc_statmap_t; - - - -/* Linked list of all loop statistics structures */ -extern lc_statmap_t * lc_statmap_list; - - - -static inline -int -lc_state_valid (lc_statset_t const * restrict const ls, - lc_state_t const * restrict const state) -{ - if (state->topology >= 0 && state->topology < ls->ntopologies) { - int const * restrict const ntilings = - ls->topology_ntilings[state->topology]; - return (state->tiling[0] >= 0 && state->tiling[0] < ntilings[0] && - state->tiling[1] >= 0 && state->tiling[1] < ntilings[1] && - state->tiling[2] >= 0 && state->tiling[2] < ntilings[2]); + typedef struct { + /* Traverse pos from min (inclusive) to max (exclusive) with a + stride of step. Equivalently, traverse idx from 0 (inclusive) + to count (exclusive). */ + lc_vec_t min, max, step, pos; + lc_ivec_t count, idx; + } lc_space_t; + + typedef struct { + lc_vec_t ash; + lc_space_t loop; + lc_space_t thread; + struct lc_thread_info_t* thread_info_ptr; /* shared between all + threads */ + int thread_done; + lc_space_t coarse; /* count, idx, pos are undefined */ + lc_space_t fine; /* count, idx, pos are undefined */ + lc_space_t fine_thread; /* min, max, pos are undefined */ + struct lc_fine_thread_comm_t* fine_thread_comm_ptr; /* shared + between SMT + threads */ + unsigned char* restrict selftest_array; + } lc_control_t; + + + + void lc_stats_init(struct lc_stats_t** stats, + char const* name, char const* file, int line); + void lc_control_init(lc_control_t* restrict control, + struct lc_stats_t *restrict stats, + ptrdiff_t imin, ptrdiff_t jmin, ptrdiff_t kmin, + ptrdiff_t imax, ptrdiff_t jmax, ptrdiff_t kmax, + ptrdiff_t iash, ptrdiff_t jash, ptrdiff_t kash, + ptrdiff_t di, ptrdiff_t dj, ptrdiff_t dk); + void lc_control_finish(lc_control_t* restrict control, + struct lc_stats_t *restrict stats); + + void lc_thread_init(lc_control_t* restrict control); + int lc_thread_done(lc_control_t const* restrict control); + void lc_thread_step(lc_control_t* restrict control); + + void lc_selftest_set(lc_control_t const* restrict control, + ptrdiff_t lmin, ptrdiff_t lmax, + ptrdiff_t imin, ptrdiff_t imax, ptrdiff_t di, + ptrdiff_t i, ptrdiff_t j, ptrdiff_t k); + + + +#define LC_COARSE_SETUP(D) \ + lc_control.coarse.min.v[D] = lc_control.thread.pos.v[D]; \ + lc_control.coarse.max.v[D] = \ + lc_min(lc_control.thread.max.v[D], \ + lc_control.coarse.min.v[D] + lc_control.thread.step.v[D]); \ + ptrdiff_t const lc_cmin##D = lc_control.coarse.min.v[D]; \ + ptrdiff_t const lc_cmax##D = lc_control.coarse.max.v[D]; \ + ptrdiff_t const lc_cstep##D = lc_control.coarse.step.v[D]; +#define LC_COARSE_LOOP(D) \ + for (ptrdiff_t lc_cpos##D = lc_cmin##D; \ + lc_cpos##D < lc_cmax##D; \ + lc_cpos##D += lc_cstep##D) + +#define LC_FINE_SETUP(D) \ + lc_control.fine.min.v[D] = lc_cpos##D; \ + lc_control.fine.max.v[D] = \ + lc_min(lc_control.coarse.max.v[D], \ + lc_control.fine.min.v[D] + lc_control.coarse.step.v[D]); \ + ptrdiff_t /*const*/ lc_fmin##D = lc_control.fine.min.v[D]; \ + ptrdiff_t /*const*/ lc_fmax##D = lc_control.fine.max.v[D]; \ + ptrdiff_t const lc_fstep##D = lc_control.fine.step.v[D]; \ + ptrdiff_t const lc_ftoff##D = \ + lc_control.fine_thread.idx.v[D] * lc_control.fine_thread.step.v[D]; +#define LC_FINE_LOOP(I, NI, D) \ + for (ptrdiff_t I = lc_fmin##D + lc_ftoff##D; \ + I < lc_fmax##D; \ + I += lc_fstep##D) \ + { \ + ptrdiff_t const NI CCTK_ATTRIBUTE_UNUSED = \ + CCTK_BUILTIN_EXPECT(lc_dir##D==0, 1) ? 0 : \ + lc_dir##D<0 ? I+1 : lc_control.loop.max.v[D]-I; + +#if VECTORISE_ALIGNED_ARRAYS + /* Arrays are aligned: fmin0 is the aligned loop boundary; keep it, + and set up imin to be the intended loop boundary */ +# define LC_ALIGN(i,j,k) \ + ptrdiff_t const lc_imin = lc_max(lc_control.loop.min.v[0], lc_fmin0); \ + ptrdiff_t const lc_imax = lc_fmax0; +#else + /* Arrays are not aligned: fine.min[0] and fine.max[0] are the + intended loop boundaries; override fmin0 and fmax0 to be aligned, + and set imin and imax; this may move the fine loop boundaries + except at outer boundaries to avoid partial stores */ +# define LC_ALIGN(i,j,k) \ + lc_fmin0 = lc_control.fine.min.v[0]; \ + lc_fmax0 = lc_control.fine.max.v[0]; \ + ptrdiff_t lc_imin = lc_fmin0; \ + ptrdiff_t lc_imax = lc_fmax0; \ + int const lc_fmin0_is_outer = lc_fmin0 == lc_control.loop.min.v[0]; \ + int const lc_fmax0_is_outer = lc_fmax0 == lc_control.loop.max.v[0]; \ + ptrdiff_t const lc_ipos = lc_fmin0 + lc_ash0 * (j + lc_ash1 * k); \ + ptrdiff_t const lc_ioffset = lc_ipos % lc_align0; \ + lc_fmin0 -= lc_ioffset; \ + if (!lc_fmax0_is_outer) lc_fmax0 -= lc_ioffset; \ + if (!lc_fmin0_is_outer) lc_imin = lc_fmin0; \ + if (!lc_fmax0_is_outer) lc_imax = lc_fmax0; +#endif + +#define LC_SELFTEST(i,j,k) \ + if (CCTK_BUILTIN_EXPECT(lc_control.selftest_array != NULL, 0)) { \ + lc_selftest_set(&lc_control, \ + lc_control.loop.min.v[0], lc_control.loop.max.v[0], \ + lc_imin, lc_imax, lc_align0, i, j, k); \ } - return 0; -} - -static inline -int -lc_state_equal (lc_state_t const * restrict const state1, - lc_state_t const * restrict const state2) -{ - return (state1->topology == state2->topology && - state1->tiling[0] == state2->tiling[0] && - state1->tiling[1] == state2->tiling[1] && - state1->tiling[2] == state2->tiling[2]); -} - - - -void -lc_stattime_init (lc_stattime_t * restrict const lt, - lc_statset_t * restrict const ls, - lc_state_t const * restrict const state); - -lc_stattime_t * -lc_stattime_find (lc_statset_t const * restrict const ls, - lc_state_t const * restrict const state) - CCTK_ATTRIBUTE_PURE; - -lc_stattime_t * -lc_stattime_find_create (lc_statset_t * restrict const ls, - lc_state_t const * restrict const state); - - - -/* TODO: introduce type for num_threads and npoints[3] */ -void -lc_statset_init (lc_statset_t * restrict const ls, - lc_statmap_t * restrict const lm, - int const num_threads, - int const npoints[3]); - -lc_statset_t * -lc_statset_find (lc_statmap_t const * restrict const lm, - int const num_threads, - int const npoints[3]) - CCTK_ATTRIBUTE_PURE; - -lc_statset_t * -lc_statset_find_create (lc_statmap_t * restrict const lm, - int const num_threads, - int const npoints[3]); - - - -typedef struct lc_control_t { - lc_statmap_t * restrict statmap; - lc_statset_t * restrict statset; - lc_stattime_t * restrict stattime; - - /* Copy of arguments (useful for debugging) */ - /* Full domain */ - int imin, jmin, kmin; - int imax, jmax, kmax; - int ilsh, jlsh, klsh; - int di; - - /* Control settings for thread parallelism (useful for debugging) */ - /* Outer thread decomposition of full domain */ - int iiimin, jjjmin, kkkmin; - int iiimax, jjjmax, kkkmax; - int iiistep, jjjstep, kkkstep; - - /* Control settings for current thread (useful for debugging) */ - int thread_num; - /* Location of this thread in full domain */ - int iii, jjj, kkk; - /* Index (not location!) of this thread in loop tile */ - int iiii, jjjj, kkkk; - - /* Control settings for tiling loop */ - /* Loop tiling decomposition in this thread's domain */ - int iimin, jjmin, kkmin; - int iimax, jjmax, kkmax; - int iistep, jjstep, kkstep; - - /* Control settings for inner thread parallelism */ - /* Inner thread decomposition, as offsets (!) to loop tiling */ - int iiiimin, jjjjmin, kkkkmin; - int iiiimax, jjjjmax, kkkkmax; - int iiiistep, jjjjstep, kkkkstep; - - /* Timing statistics */ - double time_setup_begin, time_calc_begin; - - /* Self check */ - char * restrict selftest_count; -} lc_control_t; - - - -static inline -int -lc_min (int const i, int const j) -{ - return i < j ? i : j; -} - -static inline -int -lc_max (int const i, int const j) -{ - return i > j ? i : j; -} - -/* Align by shifting to the right if necessary */ -static inline -int -lc_align (int const i, int const di) -{ - return (i + di - 1) / di * di; -} - - - -void -lc_statmap_init (int * restrict initialised, - lc_statmap_t * restrict ls, - char const * restrict name); - -void -lc_control_init (lc_control_t * restrict lc, - lc_statmap_t * restrict lm, - int imin, int jmin, int kmin, - int imax, int jmax, int kmax, - int ilsh, int jlsh, int klsh, - int di); - -void -lc_control_selftest (lc_control_t * restrict lc, - int imin, int imax, int j, int k); - -void -lc_control_finish (lc_control_t * restrict lc); - - - -#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \ - LC_LOOP3VEC(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh, 1) + + + +#define LC_LOOP3STR_NORMAL(name, i,j,k, ni,nj,nk, \ + idir_, jdir_, kdir_, \ + imin_,jmin_,kmin_, \ + imax_,jmax_,kmax_, \ + iash_,jash_,kash_, \ + imin,imax, di_) \ + do { \ + typedef int lc_loop3vec_##name; \ + \ + ptrdiff_t const lc_dir0 CCTK_ATTRIBUTE_UNUSED = (idir_); \ + ptrdiff_t const lc_dir1 CCTK_ATTRIBUTE_UNUSED = (jdir_); \ + ptrdiff_t const lc_dir2 CCTK_ATTRIBUTE_UNUSED = (kdir_); \ + \ + ptrdiff_t const lc_ash0 CCTK_ATTRIBUTE_UNUSED = (iash_); \ + ptrdiff_t const lc_ash1 CCTK_ATTRIBUTE_UNUSED = (jash_); \ + ptrdiff_t const lc_ash2 CCTK_ATTRIBUTE_UNUSED = (kash_); \ + \ + ptrdiff_t const lc_align0 CCTK_ATTRIBUTE_UNUSED = (di_); \ + ptrdiff_t const lc_align1 CCTK_ATTRIBUTE_UNUSED = 1; \ + ptrdiff_t const lc_align2 CCTK_ATTRIBUTE_UNUSED = 1; \ + \ + static struct lc_stats_t* lc_stats = NULL; \ + lc_stats_init(&lc_stats, #name, __FILE__, __LINE__); \ + \ + lc_control_t lc_control; \ + lc_control_init(&lc_control, lc_stats, \ + (imin_), (jmin_), (kmin_), \ + (imax_), (jmax_), (kmax_), \ + lc_ash0, lc_ash1, lc_ash2, \ + lc_align0, lc_align1, lc_align2); \ + \ + /* Multithreading */ \ + for (lc_thread_init(&lc_control); \ + !lc_thread_done(&lc_control); \ + lc_thread_step(&lc_control)) \ + { \ + \ + /* Coarse loops */ \ + LC_COARSE_SETUP(2) \ + LC_COARSE_SETUP(1) \ + LC_COARSE_SETUP(0) \ + LC_COARSE_LOOP(2) { \ + LC_COARSE_LOOP(1) { \ + LC_COARSE_LOOP(0) { \ + \ + /* Fine loops */ \ + LC_FINE_SETUP(2) \ + LC_FINE_SETUP(1) \ + LC_FINE_SETUP(0) \ + LC_FINE_LOOP(k, nk, 2) { \ + LC_FINE_LOOP(j, nj, 1) { \ + LC_ALIGN(i,j,k) \ + LC_FINE_LOOP(i, ni, 0) { \ + LC_SELFTEST(i,j,k) \ + { + +#define LC_ENDLOOP3STR_NORMAL(name) \ + } /* body */ \ + }}}}}} /* fine */ \ + }}} /* coarse */ \ + } /* multithreading */ \ + lc_control_finish(&lc_control, lc_stats); \ + typedef lc_loop3vec_##name lc_ensure_proper_nesting; \ + } while(0) + + + +/* Definitions to ensure compatibility with earlier versions of + LoopControl */ +#define LC_LOOP3VEC(name, i,j,k, \ + imin_,jmin,kmin, imax_,jmax,kmax, \ + iash,jash,kash, imin,imax, di) \ + LC_LOOP3STR_NORMAL(name, i,j,k, lc_ni,lc_nj,lc_nk, \ + 0,0,0, \ + imin_,jmin,kmin, imax_,jmax,kmax, \ + iash,jash,kash, imin,imax, di) +#define LC_ENDLOOP3VEC(name) \ + LC_ENDLOOP3STR_NORMAL(name) + +#define LC_LOOP3(name, i,j,k, \ + imin,jmin,kmin, imax,jmax,kmax, \ + iash,jash,kash) \ + LC_LOOP3VEC(name, i,j,k, \ + imin,jmin,kmin, imax,jmax,kmax, \ + iash,jash,kash, lc_imin_,lc_imax_, 1) #define LC_ENDLOOP3(name) \ LC_ENDLOOP3VEC(name) - -#define LC_LOOP3VEC(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, ilsh_,jlsh_,klsh_, di_) \ - do { \ - typedef int lc_loop3vec_##name; \ - static int lc_initialised = 0; \ - static lc_statmap_t lc_lm; \ - if (! lc_initialised) { \ - lc_statmap_init (& lc_initialised, & lc_lm, #name); \ - } \ - int const lc_di = (di_); \ - lc_control_t lc_lc; \ - lc_control_init (& lc_lc, & lc_lm, \ - (imin_), (jmin_), (kmin_), \ - (imax_), (jmax_), (kmax_), \ - (ilsh_), (jlsh_), (klsh_), \ - lc_di); \ - int const lc_do_selftest = lc_lc.selftest_count != 0; \ - \ - /* Coarse loop */ \ - for (int lc_kk = lc_lc.kkmin; \ - lc_kk < lc_lc.kkmax; \ - lc_kk += lc_lc.kkstep) \ - { \ - int const lc_kmin = lc_kk + lc_lc.kkkkmin; \ - int const lc_kmax = \ - lc_min (lc_kk + lc_min (lc_lc.kkkkmax, lc_lc.kkstep), \ - lc_lc.kkmax); \ - \ - for (int lc_jj = lc_lc.jjmin; \ - lc_jj < lc_lc.jjmax; \ - lc_jj += lc_lc.jjstep) \ - { \ - int const lc_jmin = lc_jj + lc_lc.jjjjmin; \ - int const lc_jmax = \ - lc_min (lc_jj + lc_min (lc_lc.jjjjmax, lc_lc.jjstep), \ - lc_lc.jjmax); \ - \ - for (int lc_ii = lc_lc.iimin; \ - lc_ii < lc_lc.iimax; \ - lc_ii += lc_lc.iistep) \ - { \ - int const lc_imin = lc_ii + lc_lc.iiiimin; \ - int const lc_imax = \ - lc_min (lc_ii + lc_min (lc_lc.iiiimax, lc_lc.iistep), \ - lc_lc.iimax); \ - \ - /* Fine loop */ \ - for (int k = lc_kmin; k < lc_kmax; ++k) { \ - for (int j = lc_jmin; j < lc_jmax; ++j) { \ - LC_PRELOOP_STATEMENTS \ - { \ - if (CCTK_BUILTIN_EXPECT(lc_do_selftest, 0)) { \ - lc_control_selftest (& lc_lc, lc_imin, lc_imax, j, k); \ - } \ - int const lc_ipos = \ - lc_imin + lc_lc.ilsh * (j + lc_lc.jlsh * k); \ - int const lc_ioffset = (lc_ipos & - lc_di) - lc_ipos; \ - for (int i = lc_imin + lc_ioffset; i < lc_imax; i += lc_di) { - -#define LC_ENDLOOP3VEC(name) \ - } \ - } \ - LC_POSTLOOP_STATEMENTS \ - } \ - } \ - } \ - } \ - } \ - lc_control_finish (& lc_lc); \ - typedef lc_loop3vec_##name lc_ensure_proper_nesting; \ - } while (0) - -/* Pre- and post loop statements are inserted around the innermost - loop, which is executed serially. By default these are empty. */ -#define LC_PRELOOP_STATEMENTS -#define LC_POSTLOOP_STATEMENTS - - - + + + /* Replace CCTK_LOOP macros */ -#undef CCTK_LOOP3 -#undef CCTK_ENDLOOP3 -#define CCTK_LOOP3 LC_LOOP3 -#define CCTK_ENDLOOP3 LC_ENDLOOP3 - - - -#ifdef __cplusplus -} +#if !defined CCTK_LOOP3STR_NORMAL || !defined CCTK_ENDLOOP3STR_NORMAL +# error "internal error" #endif - +#undef CCTK_LOOP3STR_NORMAL +#undef CCTK_ENDLOOP3STR_NORMAL +#define CCTK_LOOP3STR_NORMAL(name, i,j,k, ni,nj,nk, \ + idir, jdir, kdir, \ + imin_,jmin,kmin, \ + imax_,jmax,kmax, \ + iash,jash,kash, \ + imin,imax, di) \ + LC_LOOP3STR_NORMAL(name, i,j,k, ni,nj,nk, \ + idir, jdir, kdir, \ + imin_,jmin,kmin, \ + imax_,jmax,kmax, \ + iash,jash,kash, \ + imin,imax, di) +#define CCTK_ENDLOOP3STR_NORMAL(name) \ + LC_ENDLOOP3STR_NORMAL(name) + + + +#ifdef __cplusplus + } #endif - +#endif /* #ifdef CCODE */ #ifdef FCODE # include "loopcontrol_fortran.h" #endif -#endif /* ifndef LC_LOOPCONTROL_H */ +#endif /* #ifndef LOOPCONTROL_H */ diff --git a/Carpet/LoopControl/src/loopcontrol_fortran.h b/Carpet/LoopControl/src/loopcontrol_fortran.h index dcde8e149..f0548b7c3 100644 --- a/Carpet/LoopControl/src/loopcontrol_fortran.h +++ b/Carpet/LoopControl/src/loopcontrol_fortran.h @@ -3,86 +3,141 @@ #ifndef LOOPCONTROL_FORTRAN_H #define LOOPCONTROL_FORTRAN_H +#include "cctk.h" + + + + +#define LC_COARSE_DECLARE(name, D) \ + CCTK_POINTER :: name/**/_cmin/**/D, name/**/_cmax/**/D, \ + name/**/_cstep/**/D, name/**/_cpos/**/D +#define LC_COARSE_OMP_PRIVATE(name, D) \ + name/**/_cmin/**/D, name/**/_cmax/**/D, \ + name/**/_cstep/**/D, name/**/_cpos/**/D +#define LC_COARSE_SETUP(name, D) \ + name/**/_control%coarse%min%v(D) = name/**/_control%thread%pos%v(D) && \ + name/**/_control%coarse%max%v(D) = \ + min(name/**/_control%thread%max%v(D), \ + name/**/_control%coarse%min%v(D) + \ + name/**/_control%thread%step%v(D)) && \ + name/**/_cmin/**/D = name/**/_control%coarse%min%v(D) && \ + name/**/_cmax/**/D = name/**/_control%coarse%max%v(D) && \ + name/**/_cstep/**/D = name/**/_control%coarse%step%v(D) +#define LC_COARSE_LOOP(name, D) \ + do name/**/_cpos/**/D = name/**/_cmin/**/D, name/**/_cmax/**/D, \ + name/**/_cstep/**/D + +#define LC_FINE_DECLARE(name, I, D) \ + CCTK_POINTER :: name/**/_fmin/**/D, name/**/_fmax/**/D, \ + name/**/_fstep/**/D, I +#define LC_FINE_OMP_PRIVATE(name, I, D) \ + name/**/_fmin/**/D, name/**/_fmax/**/D, \ + name/**/_fstep/**/D, I +#define LC_FINE_SETUP(name, D) \ + name/**/_control%fine%min%v(D) = name/**/_cpos/**/D && \ + name/**/_control%fine%max%v(D) = \ + min(name/**/_control%coarse%max%v(D), \ + name/**/_control%fine%min%v(D) + \ + name/**/_control%coarse%step%v(D)) && \ + name/**/_fmin/**/D = name/**/_control%fine%min%v(D) && \ + name/**/_fmax/**/D = name/**/_control%fine%max%v(D) && \ + name/**/_fstep/**/D = name/**/_control%fine%step%v(D) +#define LC_FINE_LOOP(name, I, D) \ + do I = name/**/_fmin/**/D, name/**/_fmax/**/D, name/**/_fstep/**/D + + + +#define LC_DECLARE3(name, i,j,k) \ + CCTK_POINTER :: name/**/_ash1, name/**/_ash2, name/**/_ash3 && \ + CCTK_POINTER :: name/**/_align1, name/**/_align2, name/**/_align3 && \ + CCTK_POINTER, save :: name/**/_stats = 0 && \ + type(lc_control_t) :: name/**/_control && \ + LC_COARSE_DECLARE(name, 1) && \ + LC_COARSE_DECLARE(name, 2) && \ + LC_COARSE_DECLARE(name, 3) && \ + LC_FINE_DECLARE(name, i, 1) && \ + LC_FINE_DECLARE(name, j, 2) && \ + LC_FINE_DECLARE(name, k, 3) + +#define LC_OMP_PRIVATE(name, i,j,k) \ + name/**/_ash1, name/**/_ash2, name/**/_ash3, \ + name/**/_align1, name/**/_align2, name/**/_align3, \ + name/**/_control, \ + LC_COARSE_OMP_PRIVATE(name, 1), \ + LC_COARSE_OMP_PRIVATE(name, 2), \ + LC_COARSE_OMP_PRIVATE(name, 3), \ + LC_FINE_OMP_PRIVATE(name, i, 1), \ + LC_FINE_OMP_PRIVATE(name, j, 2), \ + LC_FINE_OMP_PRIVATE(name, k, 3) + + + +#define LC_LOOP3STR(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, \ + iash_,jash_,kash_, di_) \ + name/**/_ash1 = (iash_) && \ + name/**/_ash2 = (jash_) && \ + name/**/_ash3 = (kash_) && \ + name/**/_aligh1 = (di_) && \ + name/**/_aligh2 = 1 && \ + name/**/_aligh3 = 1 && \ + && \ + call lc_stats_init(name/**/stats, #name) && \ + call lc_control_init(name/**/control, name/**/stats, \ + (imin_), (jmin_), (kmin_), \ + (imax_), (jmax_), (kmax_), \ + name/**/ash1, name/**/ash2, name/**/ash3, \ + name/**/align1, name/**/align2, name/**/align3) && \ + && \ + /* Multithreading */ && \ + call lc_thread_init(name/**/control) && \ + do while (.not. lc_thread_done(name/**/control)) && \ + && \ + /* Coarse loops */ && \ + LC_COARSE_SETUP(3) && \ + LC_COARSE_SETUP(2) && \ + LC_COARSE_SETUP(1) && \ + LC_COARSE_LOOP(3) && \ + LC_COARSE_LOOP(2) && \ + LC_COARSE_LOOP(1) && \ + && \ + /* Fine loops */ && \ + LC_FINE_SETUP(3) && \ + LC_FINE_SETUP(2) && \ + LC_FINE_SETUP(1) && \ + LC_FINE_LOOP(3) && \ + LC_FINE_LOOP(2) && \ + LC_FINE_LOOP(1) + +#define LC_ENDLOOP3STR(name) && \ + end do && \ + end do && \ + end do && \ + end do && \ + end do && \ + end do && \ + call lc_thread_step(name/**/control) && \ + end do && \ + call lc_control_finish(name/**/control, name/**/stats) + + + +#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, iash,jash,kash) \ + LC_LOOP3STR(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, iash,jash,kash, 1) +#define LC_ENDLOOP3(name) \ + LC_ENDLOOP3STR(name) -#define LC_DECLARE3(name, i,j,k) &&\ -type (lc_statmap_t), save :: name/**/_lm &&\ -integer, save :: name/**/_initialised = 0 &&\ -type (lc_control_t) :: name/**/_lc &&\ -integer :: name/**/_ii, name/**/_jj, name/**/_kk &&\ -integer :: name/**/_imin, name/**/_jmin, name/**/_kmin &&\ -integer :: name/**/_imax, name/**/_jmax, name/**/_kmax &&\ -integer :: name/**/_ipos, name/**/_ioffset, name/**/_di &&\ -integer :: i, j, k - - - -#define LC_PRIVATE3(name) \ -name/**/_lc, \ -name/**/_imin, name/**/_jmin, name/**/_kmin, \ -name/**/_imax, name/**/_jmax, name/**/_kmax, \ -name/**/_ipos, name/**/_ioffset, name/**/_di +/* Replace CCTK_LOOP macros */ +#undef CCTK_LOOP3_DECLARE +#undef CCTK_LOOP3_OMP_PRIVATE +#undef CCTK_LOOP3 +#undef CCTK_ENDLOOP3 +#define CCTK_LOOP3_DECLARE LC_LOOP3_DECLARE +#define CCTK_LOOP3_OMP_PRIVATE LC_LOOP3_OMP_PRIVATE +#define CCTK_LOOP3 LC_LOOP3 +#define CCTK_ENDLOOP3 LC_ENDLOOP3 -#define LC_LOOP3(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh) \ - LC_LOOP3VEC(name, i,j,k, imin,jmin,kmin, imax,jmax,kmax, ilsh,jlsh,klsh, 1) -#define LC_ENDLOOP3(name) \ - LC_ENDLOOP3VEC(name) - -#define LC_LOOP3VEC(name, i,j,k, imin_,jmin_,kmin_, imax_,jmax_,kmax_, ilsh_,jlsh_,klsh_, di_) &&\ - if (name/**/_initialised .eq. 0) then &&\ - call lc_statmap_init (name/**/_initialised, name/**/_lm, "name") &&\ - end if &&\ - name/**/_di = (di_) &&\ - call lc_control_init (name/**/_lc, name/**/_lm, \ - (imin_), (jmin_), (kmin_), \ - (imax_), (jmax_), (kmax_), \ - (ilsh_), (jlsh_), (klsh_), \ - name/**/_di) &&\ - &&\ - /* Coarse loop */ &&\ -name/**/_lc_loop3vec: \ - do name/**/_kk = name/**/_lc%kkmin + 1, name/**/_lc%kkmax, name/**/_lc%kkstep &&\ - name/**/_kmin = name/**/_kk - 1 + name/**/_lc%kkkkmin &&\ - name/**/_kmax = min (name/**/_kk - 1 + name/**/_lc%kkkkmax, name/**/_lc%kkmax) &&\ - &&\ - do name/**/_jj = name/**/_lc%jjmin + 1, name/**/_lc%jjmax, name/**/_lc%jjstep &&\ - name/**/_jmin = name/**/_jj - 1 + name/**/_lc%jjjjmin &&\ - name/**/_jmax = min (name/**/_jj - 1 + name/**/_lc%jjjjmax, name/**/_lc%jjmax) &&\ - &&\ - do name/**/_ii = name/**/_lc%iimin + 1, name/**/_lc%iimax, name/**/_lc%iistep &&\ - name/**/_imin = name/**/_ii - 1 + name/**/_lc%iiiimin &&\ - name/**/_imax = min (name/**/_ii - 1 + name/**/_lc%iiiimax, name/**/_lc%iimax) &&\ - &&\ - /* Fine loop */ &&\ - do k = name/**/_kmin + 1, name/**/_kmax &&\ - do j = name/**/_jmin + 1, name/**/_jmax &&\ - LC_PRELOOP_STATEMENTS &&\ - name/**/_ipos = \ - name/**/_imin + name/**/_lc%ilsh * (j - 1 + name/**/_lc%jlsh * (k-1)) &&\ - /* round down to the next multiple of lc_di, assuming that lc_di is a power */ &&\ - /* of 2, and integers are encoded as two's-complement */ &&\ - name/**/_ioffset = iand(name/**/_ipos, - name/**/_di) - name/**/_ipos &&\ - do i = name/**/_imin + name/**/_ioffset + 1, name/**/_imax, name/**/_di - - - -#define LC_ENDLOOP3VEC(name) &&\ - end do &&\ - end do &&\ - LC_POSTLOOP_STATEMENTS &&\ - end do &&\ - &&\ - end do &&\ - end do &&\ -end do name/**/_lc_loop3vec &&\ - &&\ -call lc_control_finish (name/**/_lc) - -/* Pre- and post loop statements are inserted around the innermost - loop, which is executed serially. By default these are empty. */ -#define LC_PRELOOP_STATEMENTS -#define LC_POSTLOOP_STATEMENTS #endif /* #ifndef LOOPCONTROL_FORTRAN_H */ diff --git a/Carpet/LoopControl/src/loopcontrol_types.F90 b/Carpet/LoopControl/src/loopcontrol_types.F90 index 159f49e92..f403aecca 100644 --- a/Carpet/LoopControl/src/loopcontrol_types.F90 +++ b/Carpet/LoopControl/src/loopcontrol_types.F90 @@ -1,60 +1,40 @@ -#include +#include "cctk.h" + +#include "loopcontrol.h" + + module loopcontrol_types - ! Note: this type must correspond to the corresponding C type - ! declared in loopcontrol.h - type lc_statmap_t - sequence - - CCTK_POINTER next - - ! Name - CCTK_POINTER_TO_CONST name - - CCTK_POINTER statset_list - end type lc_statmap_t + implicit none - ! Note: this type must correspond to the corresponding C type + ! Note: These types must correspond to the corresponding C types ! declared in loopcontrol.h - type lc_control_t - sequence - - CCTK_POINTER statmap - CCTK_POINTER statset - CCTK_POINTER stattime - - ! Copy of arguments (useful for debugging) - integer imin, jmin, kmin - integer imax, jmax, kmax - integer ilsh, jlsh, klsh - integer di - - ! Control settings for thread parallelism (useful for debugging) - integer iiimin, jjjmin, kkkmin - integer iiimax, jjjmax, kkkmax - integer iiistep, jjjstep, kkkstep - - ! Control settings for current thread (useful for debugging) - integer thread_num - integer iii, jjj, kkk - integer iiii, jjjj, kkkk - - ! Control settings for tiling loop - integer iimin, jjmin, kkmin - integer iimax, jjmax, kkmax - integer iistep, jjstep, kkstep - - ! Control settings for inner thread parallelism - integer iiiimin, jjjjmin, kkkkmin - integer iiiimax, jjjjmax, kkkkmax - integer iiiistep, jjjjstep, kkkkstep - - ! Timing statistics - double precision time_setup_begin, time_calc_begin - - ! Self check - CCTK_POINTER selftest_count + + type, bind(C) :: lc_vec_t + CCTK_POINTER :: v(LC_DIM) + end type lc_vec_t + + type, bind(C) :: lc_ivec_t + integer :: v(LC_DIM) + end type lc_ivec_t + + type, bind(C) :: lc_space_t + type(lc_vec_t) :: min, max, step, pos + type(lc_ivec_t) :: count, idx + end type lc_space_t + + type, bind(C) :: lc_control_t + type(lc_vec_t) :: ash + type(lc_space_t) :: loop + type(lc_space_t) :: thread + CCTK_POINTER :: thread_idx_ptr + integer :: thread_done + type(lc_space_t) :: coarse + type(lc_space_t) :: fine + type(lc_space_t) :: fine_thread + CCTK_POINTER :: fine_thread_info_ptr + CCTK_POINTER :: selftest_array end type lc_control_t end module loopcontrol_types diff --git a/Carpet/LoopControl/src/make.code.defn b/Carpet/LoopControl/src/make.code.defn index a0cd4b09b..e0a86fa85 100644 --- a/Carpet/LoopControl/src/make.code.defn +++ b/Carpet/LoopControl/src/make.code.defn @@ -1,7 +1,7 @@ # Main make.code.defn file for thorn LoopControl # Source files in this directory -SRCS = loopcontrol.c loopcontrol.F90 lc_get_type_sizes.F90 loopcontrol_types.F90 lc_auto.c lc_siman.c lc_hill.c +SRCS = loopcontrol.cc loopcontrol.F90 loopcontrol_types.F90 type_sizes.F90 # Subdirectories containing source files SUBDIRS = diff --git a/Carpet/LoopControl/src/type_sizes.F90 b/Carpet/LoopControl/src/type_sizes.F90 new file mode 100644 index 000000000..86e8304d8 --- /dev/null +++ b/Carpet/LoopControl/src/type_sizes.F90 @@ -0,0 +1,21 @@ +#include "cctk.h" +#include "cctk_Functions.h" + + + +subroutine lc_get_fortran_type_sizes(type_sizes) + use loopcontrol_types + implicit none + DECLARE_CCTK_FUNCTIONS + CCTK_POINTER :: type_sizes(4) + + type(lc_vec_t) :: vec(2) + type(lc_ivec_t) :: ivec(2) + type(lc_space_t) :: space(2) + type(lc_control_t) :: control(2) + + type_sizes(1) = CCTK_PointerTo(vec(2)) - CCTK_PointerTo(vec(1)) + type_sizes(2) = CCTK_PointerTo(ivec(2)) - CCTK_PointerTo(ivec(1)) + type_sizes(3) = CCTK_PointerTo(space(2)) - CCTK_PointerTo(space(1)) + type_sizes(4) = CCTK_PointerTo(control(2)) - CCTK_PointerTo(control(1)) +end subroutine lc_get_fortran_type_sizes diff --git a/Carpet/LoopControl/src/wavetoy-loopcontrol.c b/Carpet/LoopControl/src/wavetoy-loopcontrol.c deleted file mode 100644 index cdf61d1f2..000000000 --- a/Carpet/LoopControl/src/wavetoy-loopcontrol.c +++ /dev/null @@ -1,274 +0,0 @@ -// Evolve the scalar wave equation with Dirichlet boundaries -// (C) 2007-08-16 Erik Schnetter - - - -// Redshift: -// ~/gcc/bin/gcc -std=gnu99 -fopenmp -Wall -g -O3 -fomit-frame-pointer -march=pentium4 -malign-double -o wavetoy-loopcontrol loopcontrol.c wavetoy-loopcontrol.c -// icc -std=c99 -openmp -Wall -g -fast -march=pentium4 -align -o wavetoy-loopcontrol loopcontrol.c wavetoy-loopcontrol.c - -// Abe: -// icc -restrict -openmp -Wall -g -fast -fomit-frame-pointer -align -o wavetoy-loopcontrol loopcontrol.c wavetoy-loopcontrol.c - -// Ducky: -// xlc_r -q64 -qlanglvl=stdc99 -qsmp=omp -g -O3 -qmaxmem=-1 -qhot -qarch=pwr5 -qtune=pwr5 -o wavetoy wavetoy.c - -// Eric: -// icc -std=c99 -openmp -Wall -g -fast -fomit-frame-pointer -align -o wavetoy wavetoy.c -// LM_LICENSE_FILE=/usr/local/compilers/pgi/license.dat /usr/local/compilers/pgi/linux86/6.1/bin/pgcc -c9x -mp -g -fastsse -tp=piv -Mdalign -Mllalign -o wavetoy wavetoy.c - -// Prism: -// /opt/intel/8.1.023/bin/icc -std=c99 -openmp -Wall -g -fast -o wavetoy wavetoy.c - -// Santaka: -// icc -std=c99 -openmp -Wall -g -fast -fomit-frame-pointer -o wavetoy wavetoy.c - - - -#include -#include -#include -#include -#include -#include - -#ifdef _OPENMP -# include -#endif - -#include - -#ifdef HAVE_SYS_TIME_H -# include -#endif - -#include "loopcontrol.h" - - -/*** - define this if you want to build a standalone demo - (but beware of hidden LoopControl dependencies on Cactus) -#define BUILD_STANDALONE - ***/ - - - -#ifndef _OPENMP -/* Replacements for some OpenMP routines if OpenMP is not available */ - -static inline -int -omp_get_thread_num (void) -{ - return 0; -} - -static inline -int -omp_get_num_threads (void) -{ - return 1; -} - -static inline -double -omp_get_wtime (void) -{ - struct timeval tv; - gettimeofday (& tv, NULL); - return tv.tv_sec + 1.0e-6 * tv.tv_usec; -} - -#endif - - - -static int NI; -static int NJ; -static int NK; - -static int NSTEPS; - - - -#ifdef BUILD_STANDALONE -static -int -getint (char const * restrict const string) -{ - char * endptr; - errno = 0; - long const n = strtol (string, & endptr, 0); - if (* string == '\0' || * endptr != '\0' || errno != 0) { - fprintf (stderr, "Argument \"%s\" is not a legal number\n", string); - if (errno != 0) { - perror (NULL); - } - exit (2); - } - return n; -} -#else - -#include "cctk_Parameters.h" - -#endif - - - -static inline -int -ind3d (int const i, int const j, int const k) -{ - return i + NI * (j + NJ * k); -} - - - -#ifdef BUILD_STANDALONE -int main (int argc, char **argv) -{ -#else -int lc_demo (void) -{ - DECLARE_CCTK_PARAMETERS -#endif - printf ("WaveToy\n"); -#if 0 - if (argc != 5) { - fprintf (stderr, - "Synopsis:\n" - " %s \n" - " where : number of grid points\n" - " : number of time steps\n", - argv[0]); - exit (1); - } - NI = getint (argv[1]); - NJ = getint (argv[2]); - NK = getint (argv[3]); - NSTEPS = getint (argv[4]); -#else - NI = nx; - NJ = ny; - NK = nz; - NSTEPS = nsteps; -#endif - printf (" NI=%d NJ=%d NK=%d\n", NI, NJ, NK); - printf (" NSTEPS=%d\n", NSTEPS); - - // - // Statistics - // -#pragma omp parallel - { -#pragma omp single - { - int const num_threads = omp_get_num_threads(); - printf ("Running with %d threads\n", num_threads); - } - } - - // - // Setup - // - // double const time_setup_start = omp_get_wtime(); - double const alpha = 0.5; // CFL factor - double * restrict phi0, * restrict phi1, * restrict phi2; - phi0 = malloc (NI*NJ*NK * sizeof *phi0); - phi1 = malloc (NI*NJ*NK * sizeof *phi1); - phi2 = malloc (NI*NJ*NK * sizeof *phi2); - // double const time_setup_end = omp_get_wtime(); - // printf ("Setup time: %g\n", time_setup_end - time_setup_start); - - // - // Initialise - // - printf ("Initialisation\n"); - double const time_init_start = omp_get_wtime(); - double const kx = M_PI/(NI-1); - double const ky = M_PI/(NJ-1); - double const kz = M_PI/(NK-1); - double const kt = sqrt (pow(kx,2) + pow(ky,2) + pow(kz,2)); -#pragma omp parallel - { - LC_LOOP3 (initialisation, i,j,k, 0,0,0, NI,NJ,NK, NI,NJ,NK) { - int const ind = ind3d(i,j,k); - // phi0[ind] = 0.0; - // phi0[ind] = i==1 && j==1 && k==1 ? 1.0 : 0.0; - phi0[ind] = 0.0; - phi1[ind] = sin(kx*i) * sin(ky*j) * sin(kz*k) * sin(kt*(- alpha)); - phi2[ind] = sin(kx*i) * sin(ky*j) * sin(kz*k) * sin(kt*(-2*alpha)); - } LC_ENDLOOP3 (initialisation); - } - // phi0[ind3d(NI/2,NJ/2,NK/2)] = 1.0; - double const time_init_end = omp_get_wtime(); - printf ("Initialisation time: %g\n", time_init_end - time_init_start); - - // - // Evolve - // - printf ("Evolution\n"); - double const time_evol_start = omp_get_wtime(); - for (int step=1; step<=NSTEPS; ++step) { - // if (step % 10 == 0) { - // printf ("Step %d\n", step); - // } - - // - // Rotate - // - { - double * const tmp=phi2; phi2=phi1; phi1=phi0; phi0=tmp; - } - - // - // Step - // - // double const time_step_start = omp_get_wtime(); - double const alpha2 = pow(alpha,2); - double const factor = 2 * (1 - 3 * alpha2); - int const di = ind3d(1,0,0), dj = ind3d(0,1,0), dk=ind3d(0,0,1); -#pragma omp parallel - { - LC_LOOP3 (evolution, i,j,k, 1,1,1, NI-1,NJ-1,NK-1, NI,NJ,NK) { - int const ind = ind3d(i,j,k); - phi0[ind] = (+ factor*phi1[ind] - phi2[ind] - + alpha2 * (+ phi1[ind-di] + phi1[ind+di] - + phi1[ind-dj] + phi1[ind+dj] - + phi1[ind-dk] + phi1[ind+dk])); - // phi0[ind] = phi1[ind] + 1.0; - } LC_ENDLOOP3 (evolution); - } // omp parallel - // double const time_step_end = omp_get_wtime(); - // printf ("Step time: %g\n", time_step_end - time_step_start); - - } - double const time_evol_end = omp_get_wtime(); - printf ("Evolution time: %g\n", time_evol_end - time_evol_start); - - // - // Analyse - // - printf ("Analysis\n"); - double const time_analysis_start = omp_get_wtime(); - double sum = 0.0; -#pragma omp parallel reduction(+: sum) - { - CCTK_LOOP3 (analysis, i,j,k, 1,1,1, NI-1,NJ-1,NK-1, NI,NJ,NK) { - int const ind = ind3d(i,j,k); - sum += phi0[ind]; - } CCTK_ENDLOOP3 (analysis); - } - double const avg = sum / ((NI-2)*(NJ-2)*(NK-2)); - double const time_analysis_end = omp_get_wtime(); - printf ("Analysis time: %g\n", time_analysis_end - time_analysis_start); - printf ("Result: %.15g\n", avg); - - lc_printstats(0); - - // - // Shutdown - // - return 0; -} diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par new file mode 100644 index 000000000..25c092823 --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par @@ -0,0 +1,124 @@ +Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" +Cactus::cctk_timer_output = "full" + +Cactus::cctk_itlast = 10 + + + +ActiveThorns = "CycleClock Fortran hwloc MPI" + + + +ActiveThorns = "IOUtil" + +IO::out_dir = $parfile + + + +ActiveThorns = "InitBase" + + + +ActiveThorns = "LoopControl" + +LoopControl::verbose = no +LoopControl::veryverbose = no +LoopControl::selftest = yes + +LoopControl::initial_setup = "tiled" + +#Carpet::pad_to_cachelines = yes + + + +ActiveThorns = "Carpet CarpetLib CarpetReduce" + +Carpet::domain_from_coordbase = yes + +driver::ghost_size = 3 + +Carpet::init_fill_timelevels = yes + +#CarpetLib::print_timestats_every = 1 +CarpetLib::print_memstats_every = 10 + + + +ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" + +CoordBase::domainsize = "minmax" +CoordBase::xmin = 0.0 +CoordBase::ymin = 0.0 +CoordBase::zmin = 0.0 +CoordBase::xmax = 1.0 +CoordBase::ymax = 1.0 +CoordBase::zmax = 1.0 + +CoordBase::spacing = "numcells" +CoordBase::ncells_x = 30 +CoordBase::ncells_y = 30 +CoordBase::ncells_z = 30 + +CartGrid3D::type = "coordbase" +CartGrid3D::avoid_originx = no +CartGrid3D::avoid_originy = no +CartGrid3D::avoid_originz = no + +CoordBase::boundary_size_x_lower = 3 +CoordBase::boundary_size_y_lower = 3 +CoordBase::boundary_size_z_lower = 3 +CoordBase::boundary_size_x_upper = 3 +CoordBase::boundary_size_y_upper = 3 +CoordBase::boundary_size_z_upper = 3 + + + +ActiveThorns = "MoL NaNChecker Time" + +MoL::ODE_Method = "RK4" +MoL::MoL_Intermediate_Steps = 4 +MoL::MoL_Num_Scratch_Levels = 1 + +Time::dtfac = 0.4 + + + +ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" + +ADMBase::initial_data = "Cartesian Minkowski" +ADMBase::initial_lapse = "one" +ADMBase::initial_shift = "zero" +ADMBase::initial_dtlapse = "zero" +ADMBase::initial_dtshift = "zero" + + + +ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" + +ADMBase::evolution_method = "ML_BSSN" +ADMBase::lapse_evolution_method = "ML_BSSN" +ADMBase::shift_evolution_method = "ML_BSSN" + +ML_BSSN::my_boundary_condition = "Minkowski" + +ML_BSSN::harmonicN = 1 # 1+log +ML_BSSN::harmonicF = 2.0 # 1+log +ML_BSSN::ShiftGammaCoeff = 0.75 +ML_BSSN::BetaDriver = 0.5 + + + +ActiveThorns = "CarpetIOBasic" + +IOBasic::outInfo_every = 10 +IOBasic::outInfo_vars = "ADMBase::alp" + + + +ActiveThorns = "CarpetIOASCII" + +IOASCII::out0D_every = 0 # 10 +IOASCII::out0D_vars = "Carpet::timing" + +IOASCII::out1D_every = 10 +IOASCII::out1D_vars = "ADMBase::alp" diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.d.asc b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.d.asc new file mode 100644 index 000000000..71d8a4b5b --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.d.asc @@ -0,0 +1,168 @@ +# 1D ASCII output created by CarpetIOASCII +# created on Redshift.local by eschnett on Jan 09 2013 at 14:33:53-0500 +# parameter filename: "../../arrangements/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par" +# +# alp d (alp) +# +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +0 0 0 0 0 0 0 0 0 -0.0666666666666667 -0.0666666666666667 -0.0666666666666667 1 +0 0 0 0 0 1 1 1 0 -0.0333333333333333 -0.0333333333333333 -0.0333333333333333 1 +0 0 0 0 0 2 2 2 0 0 0 0 1 +0 0 0 0 0 3 3 3 0 0.0333333333333333 0.0333333333333333 0.0333333333333333 1 +0 0 0 0 0 4 4 4 0 0.0666666666666667 0.0666666666666667 0.0666666666666667 1 +0 0 0 0 0 5 5 5 0 0.1 0.1 0.1 1 +0 0 0 0 0 6 6 6 0 0.133333333333333 0.133333333333333 0.133333333333333 1 +0 0 0 0 0 7 7 7 0 0.166666666666667 0.166666666666667 0.166666666666667 1 +0 0 0 0 0 8 8 8 0 0.2 0.2 0.2 1 +0 0 0 0 0 9 9 9 0 0.233333333333333 0.233333333333333 0.233333333333333 1 +0 0 0 0 0 10 10 10 0 0.266666666666667 0.266666666666667 0.266666666666667 1 +0 0 0 0 0 11 11 11 0 0.3 0.3 0.3 1 +0 0 0 0 0 12 12 12 0 0.333333333333333 0.333333333333333 0.333333333333333 1 +0 0 0 0 0 13 13 13 0 0.366666666666667 0.366666666666667 0.366666666666667 1 +0 0 0 0 0 14 14 14 0 0.4 0.4 0.4 1 +0 0 0 0 0 15 15 15 0 0.433333333333333 0.433333333333333 0.433333333333333 1 +0 0 0 0 0 16 16 16 0 0.466666666666667 0.466666666666667 0.466666666666667 1 +0 0 0 0 0 17 17 17 0 0.5 0.5 0.5 1 +0 0 0 0 0 18 18 18 0 0.533333333333333 0.533333333333333 0.533333333333333 1 +0 0 0 0 0 19 19 19 0 0.566666666666667 0.566666666666667 0.566666666666667 1 +0 0 0 0 0 20 20 20 0 0.6 0.6 0.6 1 +# +# +# +# +# +# +# +# +# +# +# +# +# +# + +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +0 0 0 1 0 15 15 15 0 0.433333333333333 0.433333333333333 0.433333333333333 1 +0 0 0 1 0 16 16 16 0 0.466666666666667 0.466666666666667 0.466666666666667 1 +0 0 0 1 0 17 17 17 0 0.5 0.5 0.5 1 +0 0 0 1 0 18 18 18 0 0.533333333333333 0.533333333333333 0.533333333333333 1 +0 0 0 1 0 19 19 19 0 0.566666666666667 0.566666666666667 0.566666666666667 1 +0 0 0 1 0 20 20 20 0 0.6 0.6 0.6 1 +0 0 0 1 0 21 21 21 0 0.633333333333333 0.633333333333333 0.633333333333333 1 +0 0 0 1 0 22 22 22 0 0.666666666666667 0.666666666666667 0.666666666666667 1 +0 0 0 1 0 23 23 23 0 0.7 0.7 0.7 1 +0 0 0 1 0 24 24 24 0 0.733333333333333 0.733333333333333 0.733333333333333 1 +0 0 0 1 0 25 25 25 0 0.766666666666667 0.766666666666667 0.766666666666667 1 +0 0 0 1 0 26 26 26 0 0.8 0.8 0.8 1 +0 0 0 1 0 27 27 27 0 0.833333333333333 0.833333333333333 0.833333333333333 1 +0 0 0 1 0 28 28 28 0 0.866666666666667 0.866666666666667 0.866666666666667 1 +0 0 0 1 0 29 29 29 0 0.9 0.9 0.9 1 +0 0 0 1 0 30 30 30 0 0.933333333333333 0.933333333333333 0.933333333333333 1 +0 0 0 1 0 31 31 31 0 0.966666666666667 0.966666666666667 0.966666666666667 1 +0 0 0 1 0 32 32 32 0 1 1 1 1 +0 0 0 1 0 33 33 33 0 1.03333333333333 1.03333333333333 1.03333333333333 1 +0 0 0 1 0 34 34 34 0 1.06666666666667 1.06666666666667 1.06666666666667 1 + + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +10 0 0 0 0 0 0 0 0.133333333333333 -0.0666666666666667 -0.0666666666666667 -0.0666666666666667 1 +10 0 0 0 0 1 1 1 0.133333333333333 -0.0333333333333333 -0.0333333333333333 -0.0333333333333333 1 +10 0 0 0 0 2 2 2 0.133333333333333 0 0 0 1 +10 0 0 0 0 3 3 3 0.133333333333333 0.0333333333333333 0.0333333333333333 0.0333333333333333 1 +10 0 0 0 0 4 4 4 0.133333333333333 0.0666666666666667 0.0666666666666667 0.0666666666666667 1 +10 0 0 0 0 5 5 5 0.133333333333333 0.1 0.1 0.1 1 +10 0 0 0 0 6 6 6 0.133333333333333 0.133333333333333 0.133333333333333 0.133333333333333 1 +10 0 0 0 0 7 7 7 0.133333333333333 0.166666666666667 0.166666666666667 0.166666666666667 1 +10 0 0 0 0 8 8 8 0.133333333333333 0.2 0.2 0.2 1 +10 0 0 0 0 9 9 9 0.133333333333333 0.233333333333333 0.233333333333333 0.233333333333333 1 +10 0 0 0 0 10 10 10 0.133333333333333 0.266666666666667 0.266666666666667 0.266666666666667 1 +10 0 0 0 0 11 11 11 0.133333333333333 0.3 0.3 0.3 1 +10 0 0 0 0 12 12 12 0.133333333333333 0.333333333333333 0.333333333333333 0.333333333333333 1 +10 0 0 0 0 13 13 13 0.133333333333333 0.366666666666667 0.366666666666667 0.366666666666667 1 +10 0 0 0 0 14 14 14 0.133333333333333 0.4 0.4 0.4 1 +10 0 0 0 0 15 15 15 0.133333333333333 0.433333333333333 0.433333333333333 0.433333333333333 1 +10 0 0 0 0 16 16 16 0.133333333333333 0.466666666666667 0.466666666666667 0.466666666666667 1 +10 0 0 0 0 17 17 17 0.133333333333333 0.5 0.5 0.5 1 +10 0 0 0 0 18 18 18 0.133333333333333 0.533333333333333 0.533333333333333 0.533333333333333 1 +10 0 0 0 0 19 19 19 0.133333333333333 0.566666666666667 0.566666666666667 0.566666666666667 1 +10 0 0 0 0 20 20 20 0.133333333333333 0.6 0.6 0.6 1 +# +# +# +# +# +# +# +# +# +# +# +# +# +# + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +10 0 0 1 0 15 15 15 0.133333333333333 0.433333333333333 0.433333333333333 0.433333333333333 1 +10 0 0 1 0 16 16 16 0.133333333333333 0.466666666666667 0.466666666666667 0.466666666666667 1 +10 0 0 1 0 17 17 17 0.133333333333333 0.5 0.5 0.5 1 +10 0 0 1 0 18 18 18 0.133333333333333 0.533333333333333 0.533333333333333 0.533333333333333 1 +10 0 0 1 0 19 19 19 0.133333333333333 0.566666666666667 0.566666666666667 0.566666666666667 1 +10 0 0 1 0 20 20 20 0.133333333333333 0.6 0.6 0.6 1 +10 0 0 1 0 21 21 21 0.133333333333333 0.633333333333333 0.633333333333333 0.633333333333333 1 +10 0 0 1 0 22 22 22 0.133333333333333 0.666666666666667 0.666666666666667 0.666666666666667 1 +10 0 0 1 0 23 23 23 0.133333333333333 0.7 0.7 0.7 1 +10 0 0 1 0 24 24 24 0.133333333333333 0.733333333333333 0.733333333333333 0.733333333333333 1 +10 0 0 1 0 25 25 25 0.133333333333333 0.766666666666667 0.766666666666667 0.766666666666667 1 +10 0 0 1 0 26 26 26 0.133333333333333 0.8 0.8 0.8 1 +10 0 0 1 0 27 27 27 0.133333333333333 0.833333333333333 0.833333333333333 0.833333333333333 1 +10 0 0 1 0 28 28 28 0.133333333333333 0.866666666666667 0.866666666666667 0.866666666666667 1 +10 0 0 1 0 29 29 29 0.133333333333333 0.9 0.9 0.9 1 +10 0 0 1 0 30 30 30 0.133333333333333 0.933333333333333 0.933333333333333 0.933333333333333 1 +10 0 0 1 0 31 31 31 0.133333333333333 0.966666666666667 0.966666666666667 0.966666666666667 1 +10 0 0 1 0 32 32 32 0.133333333333333 1 1 1 1 +10 0 0 1 0 33 33 33 0.133333333333333 1.03333333333333 1.03333333333333 1.03333333333333 1 +10 0 0 1 0 34 34 34 0.133333333333333 1.06666666666667 1.06666666666667 1.06666666666667 1 + + diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.x.asc b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.x.asc new file mode 100644 index 000000000..6bd8d6c7b --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.x.asc @@ -0,0 +1,98 @@ +# 1D ASCII output created by CarpetIOASCII +# created on Redshift.local by eschnett on Jan 09 2013 at 14:33:53-0500 +# parameter filename: "../../arrangements/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par" +# +# alp x (alp) +# +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +0 0 0 0 0 0 2 2 0 -0.0666666666666667 0 0 1 +0 0 0 0 0 1 2 2 0 -0.0333333333333333 0 0 1 +0 0 0 0 0 2 2 2 0 0 0 0 1 +0 0 0 0 0 3 2 2 0 0.0333333333333333 0 0 1 +0 0 0 0 0 4 2 2 0 0.0666666666666667 0 0 1 +0 0 0 0 0 5 2 2 0 0.1 0 0 1 +0 0 0 0 0 6 2 2 0 0.133333333333333 0 0 1 +0 0 0 0 0 7 2 2 0 0.166666666666667 0 0 1 +0 0 0 0 0 8 2 2 0 0.2 0 0 1 +0 0 0 0 0 9 2 2 0 0.233333333333333 0 0 1 +0 0 0 0 0 10 2 2 0 0.266666666666667 0 0 1 +0 0 0 0 0 11 2 2 0 0.3 0 0 1 +0 0 0 0 0 12 2 2 0 0.333333333333333 0 0 1 +0 0 0 0 0 13 2 2 0 0.366666666666667 0 0 1 +0 0 0 0 0 14 2 2 0 0.4 0 0 1 +0 0 0 0 0 15 2 2 0 0.433333333333333 0 0 1 +0 0 0 0 0 16 2 2 0 0.466666666666667 0 0 1 +0 0 0 0 0 17 2 2 0 0.5 0 0 1 +0 0 0 0 0 18 2 2 0 0.533333333333333 0 0 1 +0 0 0 0 0 19 2 2 0 0.566666666666667 0 0 1 +0 0 0 0 0 20 2 2 0 0.6 0 0 1 +0 0 0 0 0 21 2 2 0 0.633333333333333 0 0 1 +0 0 0 0 0 22 2 2 0 0.666666666666667 0 0 1 +0 0 0 0 0 23 2 2 0 0.7 0 0 1 +0 0 0 0 0 24 2 2 0 0.733333333333333 0 0 1 +0 0 0 0 0 25 2 2 0 0.766666666666667 0 0 1 +0 0 0 0 0 26 2 2 0 0.8 0 0 1 +0 0 0 0 0 27 2 2 0 0.833333333333333 0 0 1 +0 0 0 0 0 28 2 2 0 0.866666666666667 0 0 1 +0 0 0 0 0 29 2 2 0 0.9 0 0 1 +0 0 0 0 0 30 2 2 0 0.933333333333333 0 0 1 +0 0 0 0 0 31 2 2 0 0.966666666666667 0 0 1 +0 0 0 0 0 32 2 2 0 1 0 0 1 +0 0 0 0 0 33 2 2 0 1.03333333333333 0 0 1 +0 0 0 0 0 34 2 2 0 1.06666666666667 0 0 1 + +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +10 0 0 0 0 0 2 2 0.133333333333333 -0.0666666666666667 0 0 1 +10 0 0 0 0 1 2 2 0.133333333333333 -0.0333333333333333 0 0 1 +10 0 0 0 0 2 2 2 0.133333333333333 0 0 0 1 +10 0 0 0 0 3 2 2 0.133333333333333 0.0333333333333333 0 0 1 +10 0 0 0 0 4 2 2 0.133333333333333 0.0666666666666667 0 0 1 +10 0 0 0 0 5 2 2 0.133333333333333 0.1 0 0 1 +10 0 0 0 0 6 2 2 0.133333333333333 0.133333333333333 0 0 1 +10 0 0 0 0 7 2 2 0.133333333333333 0.166666666666667 0 0 1 +10 0 0 0 0 8 2 2 0.133333333333333 0.2 0 0 1 +10 0 0 0 0 9 2 2 0.133333333333333 0.233333333333333 0 0 1 +10 0 0 0 0 10 2 2 0.133333333333333 0.266666666666667 0 0 1 +10 0 0 0 0 11 2 2 0.133333333333333 0.3 0 0 1 +10 0 0 0 0 12 2 2 0.133333333333333 0.333333333333333 0 0 1 +10 0 0 0 0 13 2 2 0.133333333333333 0.366666666666667 0 0 1 +10 0 0 0 0 14 2 2 0.133333333333333 0.4 0 0 1 +10 0 0 0 0 15 2 2 0.133333333333333 0.433333333333333 0 0 1 +10 0 0 0 0 16 2 2 0.133333333333333 0.466666666666667 0 0 1 +10 0 0 0 0 17 2 2 0.133333333333333 0.5 0 0 1 +10 0 0 0 0 18 2 2 0.133333333333333 0.533333333333333 0 0 1 +10 0 0 0 0 19 2 2 0.133333333333333 0.566666666666667 0 0 1 +10 0 0 0 0 20 2 2 0.133333333333333 0.6 0 0 1 +10 0 0 0 0 21 2 2 0.133333333333333 0.633333333333333 0 0 1 +10 0 0 0 0 22 2 2 0.133333333333333 0.666666666666667 0 0 1 +10 0 0 0 0 23 2 2 0.133333333333333 0.7 0 0 1 +10 0 0 0 0 24 2 2 0.133333333333333 0.733333333333333 0 0 1 +10 0 0 0 0 25 2 2 0.133333333333333 0.766666666666667 0 0 1 +10 0 0 0 0 26 2 2 0.133333333333333 0.8 0 0 1 +10 0 0 0 0 27 2 2 0.133333333333333 0.833333333333333 0 0 1 +10 0 0 0 0 28 2 2 0.133333333333333 0.866666666666667 0 0 1 +10 0 0 0 0 29 2 2 0.133333333333333 0.9 0 0 1 +10 0 0 0 0 30 2 2 0.133333333333333 0.933333333333333 0 0 1 +10 0 0 0 0 31 2 2 0.133333333333333 0.966666666666667 0 0 1 +10 0 0 0 0 32 2 2 0.133333333333333 1 0 0 1 +10 0 0 0 0 33 2 2 0.133333333333333 1.03333333333333 0 0 1 +10 0 0 0 0 34 2 2 0.133333333333333 1.06666666666667 0 0 1 + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# + diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.y.asc b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.y.asc new file mode 100644 index 000000000..baaa9ab1d --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.y.asc @@ -0,0 +1,98 @@ +# 1D ASCII output created by CarpetIOASCII +# created on Redshift.local by eschnett on Jan 09 2013 at 14:33:53-0500 +# parameter filename: "../../arrangements/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par" +# +# alp y (alp) +# +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +0 0 0 0 0 2 0 2 0 0 -0.0666666666666667 0 1 +0 0 0 0 0 2 1 2 0 0 -0.0333333333333333 0 1 +0 0 0 0 0 2 2 2 0 0 0 0 1 +0 0 0 0 0 2 3 2 0 0 0.0333333333333333 0 1 +0 0 0 0 0 2 4 2 0 0 0.0666666666666667 0 1 +0 0 0 0 0 2 5 2 0 0 0.1 0 1 +0 0 0 0 0 2 6 2 0 0 0.133333333333333 0 1 +0 0 0 0 0 2 7 2 0 0 0.166666666666667 0 1 +0 0 0 0 0 2 8 2 0 0 0.2 0 1 +0 0 0 0 0 2 9 2 0 0 0.233333333333333 0 1 +0 0 0 0 0 2 10 2 0 0 0.266666666666667 0 1 +0 0 0 0 0 2 11 2 0 0 0.3 0 1 +0 0 0 0 0 2 12 2 0 0 0.333333333333333 0 1 +0 0 0 0 0 2 13 2 0 0 0.366666666666667 0 1 +0 0 0 0 0 2 14 2 0 0 0.4 0 1 +0 0 0 0 0 2 15 2 0 0 0.433333333333333 0 1 +0 0 0 0 0 2 16 2 0 0 0.466666666666667 0 1 +0 0 0 0 0 2 17 2 0 0 0.5 0 1 +0 0 0 0 0 2 18 2 0 0 0.533333333333333 0 1 +0 0 0 0 0 2 19 2 0 0 0.566666666666667 0 1 +0 0 0 0 0 2 20 2 0 0 0.6 0 1 +0 0 0 0 0 2 21 2 0 0 0.633333333333333 0 1 +0 0 0 0 0 2 22 2 0 0 0.666666666666667 0 1 +0 0 0 0 0 2 23 2 0 0 0.7 0 1 +0 0 0 0 0 2 24 2 0 0 0.733333333333333 0 1 +0 0 0 0 0 2 25 2 0 0 0.766666666666667 0 1 +0 0 0 0 0 2 26 2 0 0 0.8 0 1 +0 0 0 0 0 2 27 2 0 0 0.833333333333333 0 1 +0 0 0 0 0 2 28 2 0 0 0.866666666666667 0 1 +0 0 0 0 0 2 29 2 0 0 0.9 0 1 +0 0 0 0 0 2 30 2 0 0 0.933333333333333 0 1 +0 0 0 0 0 2 31 2 0 0 0.966666666666667 0 1 +0 0 0 0 0 2 32 2 0 0 1 0 1 +0 0 0 0 0 2 33 2 0 0 1.03333333333333 0 1 +0 0 0 0 0 2 34 2 0 0 1.06666666666667 0 1 + +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +10 0 0 0 0 2 0 2 0.133333333333333 0 -0.0666666666666667 0 1 +10 0 0 0 0 2 1 2 0.133333333333333 0 -0.0333333333333333 0 1 +10 0 0 0 0 2 2 2 0.133333333333333 0 0 0 1 +10 0 0 0 0 2 3 2 0.133333333333333 0 0.0333333333333333 0 1 +10 0 0 0 0 2 4 2 0.133333333333333 0 0.0666666666666667 0 1 +10 0 0 0 0 2 5 2 0.133333333333333 0 0.1 0 1 +10 0 0 0 0 2 6 2 0.133333333333333 0 0.133333333333333 0 1 +10 0 0 0 0 2 7 2 0.133333333333333 0 0.166666666666667 0 1 +10 0 0 0 0 2 8 2 0.133333333333333 0 0.2 0 1 +10 0 0 0 0 2 9 2 0.133333333333333 0 0.233333333333333 0 1 +10 0 0 0 0 2 10 2 0.133333333333333 0 0.266666666666667 0 1 +10 0 0 0 0 2 11 2 0.133333333333333 0 0.3 0 1 +10 0 0 0 0 2 12 2 0.133333333333333 0 0.333333333333333 0 1 +10 0 0 0 0 2 13 2 0.133333333333333 0 0.366666666666667 0 1 +10 0 0 0 0 2 14 2 0.133333333333333 0 0.4 0 1 +10 0 0 0 0 2 15 2 0.133333333333333 0 0.433333333333333 0 1 +10 0 0 0 0 2 16 2 0.133333333333333 0 0.466666666666667 0 1 +10 0 0 0 0 2 17 2 0.133333333333333 0 0.5 0 1 +10 0 0 0 0 2 18 2 0.133333333333333 0 0.533333333333333 0 1 +10 0 0 0 0 2 19 2 0.133333333333333 0 0.566666666666667 0 1 +10 0 0 0 0 2 20 2 0.133333333333333 0 0.6 0 1 +10 0 0 0 0 2 21 2 0.133333333333333 0 0.633333333333333 0 1 +10 0 0 0 0 2 22 2 0.133333333333333 0 0.666666666666667 0 1 +10 0 0 0 0 2 23 2 0.133333333333333 0 0.7 0 1 +10 0 0 0 0 2 24 2 0.133333333333333 0 0.733333333333333 0 1 +10 0 0 0 0 2 25 2 0.133333333333333 0 0.766666666666667 0 1 +10 0 0 0 0 2 26 2 0.133333333333333 0 0.8 0 1 +10 0 0 0 0 2 27 2 0.133333333333333 0 0.833333333333333 0 1 +10 0 0 0 0 2 28 2 0.133333333333333 0 0.866666666666667 0 1 +10 0 0 0 0 2 29 2 0.133333333333333 0 0.9 0 1 +10 0 0 0 0 2 30 2 0.133333333333333 0 0.933333333333333 0 1 +10 0 0 0 0 2 31 2 0.133333333333333 0 0.966666666666667 0 1 +10 0 0 0 0 2 32 2 0.133333333333333 0 1 0 1 +10 0 0 0 0 2 33 2 0.133333333333333 0 1.03333333333333 0 1 +10 0 0 0 0 2 34 2 0.133333333333333 0 1.06666666666667 0 1 + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +# + diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.z.asc b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.z.asc new file mode 100644 index 000000000..d4d03a3d0 --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/alp.z.asc @@ -0,0 +1,110 @@ +# 1D ASCII output created by CarpetIOASCII +# created on Redshift.local by eschnett on Jan 09 2013 at 14:33:53-0500 +# parameter filename: "../../arrangements/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test.par" +# +# alp z (alp) +# +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +0 0 0 0 0 2 2 0 0 0 0 -0.0666666666666667 1 +0 0 0 0 0 2 2 1 0 0 0 -0.0333333333333333 1 +0 0 0 0 0 2 2 2 0 0 0 0 1 +0 0 0 0 0 2 2 3 0 0 0 0.0333333333333333 1 +0 0 0 0 0 2 2 4 0 0 0 0.0666666666666667 1 +0 0 0 0 0 2 2 5 0 0 0 0.1 1 +0 0 0 0 0 2 2 6 0 0 0 0.133333333333333 1 +0 0 0 0 0 2 2 7 0 0 0 0.166666666666667 1 +0 0 0 0 0 2 2 8 0 0 0 0.2 1 +0 0 0 0 0 2 2 9 0 0 0 0.233333333333333 1 +0 0 0 0 0 2 2 10 0 0 0 0.266666666666667 1 +0 0 0 0 0 2 2 11 0 0 0 0.3 1 +0 0 0 0 0 2 2 12 0 0 0 0.333333333333333 1 +0 0 0 0 0 2 2 13 0 0 0 0.366666666666667 1 +0 0 0 0 0 2 2 14 0 0 0 0.4 1 +0 0 0 0 0 2 2 15 0 0 0 0.433333333333333 1 +0 0 0 0 0 2 2 16 0 0 0 0.466666666666667 1 +0 0 0 0 0 2 2 17 0 0 0 0.5 1 +0 0 0 0 0 2 2 18 0 0 0 0.533333333333333 1 +0 0 0 0 0 2 2 19 0 0 0 0.566666666666667 1 +0 0 0 0 0 2 2 20 0 0 0 0.6 1 + +# iteration 0 time 0 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +0 0 0 1 0 2 2 15 0 0 0 0.433333333333333 1 +0 0 0 1 0 2 2 16 0 0 0 0.466666666666667 1 +0 0 0 1 0 2 2 17 0 0 0 0.5 1 +0 0 0 1 0 2 2 18 0 0 0 0.533333333333333 1 +0 0 0 1 0 2 2 19 0 0 0 0.566666666666667 1 +0 0 0 1 0 2 2 20 0 0 0 0.6 1 +0 0 0 1 0 2 2 21 0 0 0 0.633333333333333 1 +0 0 0 1 0 2 2 22 0 0 0 0.666666666666667 1 +0 0 0 1 0 2 2 23 0 0 0 0.7 1 +0 0 0 1 0 2 2 24 0 0 0 0.733333333333333 1 +0 0 0 1 0 2 2 25 0 0 0 0.766666666666667 1 +0 0 0 1 0 2 2 26 0 0 0 0.8 1 +0 0 0 1 0 2 2 27 0 0 0 0.833333333333333 1 +0 0 0 1 0 2 2 28 0 0 0 0.866666666666667 1 +0 0 0 1 0 2 2 29 0 0 0 0.9 1 +0 0 0 1 0 2 2 30 0 0 0 0.933333333333333 1 +0 0 0 1 0 2 2 31 0 0 0 0.966666666666667 1 +0 0 0 1 0 2 2 32 0 0 0 1 1 +0 0 0 1 0 2 2 33 0 0 0 1.03333333333333 1 +0 0 0 1 0 2 2 34 0 0 0 1.06666666666667 1 + + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 0 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +10 0 0 0 0 2 2 0 0.133333333333333 0 0 -0.0666666666666667 1 +10 0 0 0 0 2 2 1 0.133333333333333 0 0 -0.0333333333333333 1 +10 0 0 0 0 2 2 2 0.133333333333333 0 0 0 1 +10 0 0 0 0 2 2 3 0.133333333333333 0 0 0.0333333333333333 1 +10 0 0 0 0 2 2 4 0.133333333333333 0 0 0.0666666666666667 1 +10 0 0 0 0 2 2 5 0.133333333333333 0 0 0.1 1 +10 0 0 0 0 2 2 6 0.133333333333333 0 0 0.133333333333333 1 +10 0 0 0 0 2 2 7 0.133333333333333 0 0 0.166666666666667 1 +10 0 0 0 0 2 2 8 0.133333333333333 0 0 0.2 1 +10 0 0 0 0 2 2 9 0.133333333333333 0 0 0.233333333333333 1 +10 0 0 0 0 2 2 10 0.133333333333333 0 0 0.266666666666667 1 +10 0 0 0 0 2 2 11 0.133333333333333 0 0 0.3 1 +10 0 0 0 0 2 2 12 0.133333333333333 0 0 0.333333333333333 1 +10 0 0 0 0 2 2 13 0.133333333333333 0 0 0.366666666666667 1 +10 0 0 0 0 2 2 14 0.133333333333333 0 0 0.4 1 +10 0 0 0 0 2 2 15 0.133333333333333 0 0 0.433333333333333 1 +10 0 0 0 0 2 2 16 0.133333333333333 0 0 0.466666666666667 1 +10 0 0 0 0 2 2 17 0.133333333333333 0 0 0.5 1 +10 0 0 0 0 2 2 18 0.133333333333333 0 0 0.533333333333333 1 +10 0 0 0 0 2 2 19 0.133333333333333 0 0 0.566666666666667 1 +10 0 0 0 0 2 2 20 0.133333333333333 0 0 0.6 1 + +# iteration 10 time 0.133333333333333 +# time level 0 +# refinement level 0 multigrid level 0 map 0 component 1 +# column format: 1:it 2:tl 3:rl 4:c 5:ml 6:ix 7:iy 8:iz 9:time 10:x 11:y 12:z 13:data +10 0 0 1 0 2 2 15 0.133333333333333 0 0 0.433333333333333 1 +10 0 0 1 0 2 2 16 0.133333333333333 0 0 0.466666666666667 1 +10 0 0 1 0 2 2 17 0.133333333333333 0 0 0.5 1 +10 0 0 1 0 2 2 18 0.133333333333333 0 0 0.533333333333333 1 +10 0 0 1 0 2 2 19 0.133333333333333 0 0 0.566666666666667 1 +10 0 0 1 0 2 2 20 0.133333333333333 0 0 0.6 1 +10 0 0 1 0 2 2 21 0.133333333333333 0 0 0.633333333333333 1 +10 0 0 1 0 2 2 22 0.133333333333333 0 0 0.666666666666667 1 +10 0 0 1 0 2 2 23 0.133333333333333 0 0 0.7 1 +10 0 0 1 0 2 2 24 0.133333333333333 0 0 0.733333333333333 1 +10 0 0 1 0 2 2 25 0.133333333333333 0 0 0.766666666666667 1 +10 0 0 1 0 2 2 26 0.133333333333333 0 0 0.8 1 +10 0 0 1 0 2 2 27 0.133333333333333 0 0 0.833333333333333 1 +10 0 0 1 0 2 2 28 0.133333333333333 0 0 0.866666666666667 1 +10 0 0 1 0 2 2 29 0.133333333333333 0 0 0.9 1 +10 0 0 1 0 2 2 30 0.133333333333333 0 0 0.933333333333333 1 +10 0 0 1 0 2 2 31 0.133333333333333 0 0 0.966666666666667 1 +10 0 0 1 0 2 2 32 0.133333333333333 0 0 1 1 +10 0 0 1 0 2 2 33 0.133333333333333 0 0 1.03333333333333 1 +10 0 0 1 0 2 2 34 0.133333333333333 0 0 1.06666666666667 1 + + diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/bench-minkowski-carpet-1lev-test.par b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/bench-minkowski-carpet-1lev-test.par new file mode 100644 index 000000000..25c092823 --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/bench-minkowski-carpet-1lev-test.par @@ -0,0 +1,124 @@ +Cactus::cctk_run_title = "Benchmark of McLachlan using Carpet with one refinement level" +Cactus::cctk_timer_output = "full" + +Cactus::cctk_itlast = 10 + + + +ActiveThorns = "CycleClock Fortran hwloc MPI" + + + +ActiveThorns = "IOUtil" + +IO::out_dir = $parfile + + + +ActiveThorns = "InitBase" + + + +ActiveThorns = "LoopControl" + +LoopControl::verbose = no +LoopControl::veryverbose = no +LoopControl::selftest = yes + +LoopControl::initial_setup = "tiled" + +#Carpet::pad_to_cachelines = yes + + + +ActiveThorns = "Carpet CarpetLib CarpetReduce" + +Carpet::domain_from_coordbase = yes + +driver::ghost_size = 3 + +Carpet::init_fill_timelevels = yes + +#CarpetLib::print_timestats_every = 1 +CarpetLib::print_memstats_every = 10 + + + +ActiveThorns = "Boundary CartGrid3D CoordBase SymBase" + +CoordBase::domainsize = "minmax" +CoordBase::xmin = 0.0 +CoordBase::ymin = 0.0 +CoordBase::zmin = 0.0 +CoordBase::xmax = 1.0 +CoordBase::ymax = 1.0 +CoordBase::zmax = 1.0 + +CoordBase::spacing = "numcells" +CoordBase::ncells_x = 30 +CoordBase::ncells_y = 30 +CoordBase::ncells_z = 30 + +CartGrid3D::type = "coordbase" +CartGrid3D::avoid_originx = no +CartGrid3D::avoid_originy = no +CartGrid3D::avoid_originz = no + +CoordBase::boundary_size_x_lower = 3 +CoordBase::boundary_size_y_lower = 3 +CoordBase::boundary_size_z_lower = 3 +CoordBase::boundary_size_x_upper = 3 +CoordBase::boundary_size_y_upper = 3 +CoordBase::boundary_size_z_upper = 3 + + + +ActiveThorns = "MoL NaNChecker Time" + +MoL::ODE_Method = "RK4" +MoL::MoL_Intermediate_Steps = 4 +MoL::MoL_Num_Scratch_Levels = 1 + +Time::dtfac = 0.4 + + + +ActiveThorns = "ADMBase ADMCoupling ADMMacros CoordGauge StaticConformal" + +ADMBase::initial_data = "Cartesian Minkowski" +ADMBase::initial_lapse = "one" +ADMBase::initial_shift = "zero" +ADMBase::initial_dtlapse = "zero" +ADMBase::initial_dtshift = "zero" + + + +ActiveThorns = "GenericFD ML_BSSN ML_BSSN_Helper TmunuBase" + +ADMBase::evolution_method = "ML_BSSN" +ADMBase::lapse_evolution_method = "ML_BSSN" +ADMBase::shift_evolution_method = "ML_BSSN" + +ML_BSSN::my_boundary_condition = "Minkowski" + +ML_BSSN::harmonicN = 1 # 1+log +ML_BSSN::harmonicF = 2.0 # 1+log +ML_BSSN::ShiftGammaCoeff = 0.75 +ML_BSSN::BetaDriver = 0.5 + + + +ActiveThorns = "CarpetIOBasic" + +IOBasic::outInfo_every = 10 +IOBasic::outInfo_vars = "ADMBase::alp" + + + +ActiveThorns = "CarpetIOASCII" + +IOASCII::out0D_every = 0 # 10 +IOASCII::out0D_vars = "Carpet::timing" + +IOASCII::out1D_every = 10 +IOASCII::out1D_vars = "ADMBase::alp" diff --git a/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/carpetlib-memory-statistics b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/carpetlib-memory-statistics new file mode 100644 index 000000000..d9550af2f --- /dev/null +++ b/Carpet/LoopControl/test/bench-minkowski-carpet-1lev-test/carpetlib-memory-statistics @@ -0,0 +1,5 @@ +# Running on 2 processes +# +# iteration maxtotalbytes avgtotalbytes maxmaxbytes avgm avgfreebytes +0 4.09547e+07 3.99796e+07 4.12634e+07 4.02809e+07 188960 188960 0 0 0 0 +10 4.09547e+07 3.99796e+07 4.12634e+07 4.02809e+07 188960 188960 0 0 0 0 -- cgit v1.2.3