diff options
author | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-02-26 17:57:20 +0000 |
---|---|---|
committer | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-02-26 17:57:20 +0000 |
commit | 07995eaa14b03487dc1333b91aa7f6c998eca45a (patch) | |
tree | 65afd5c42fb5231d0bf253a397ae6b78a4ee1d8e | |
parent | 2d11cab54e2c0815481fb23c4fdcd1a74dcc34d1 (diff) |
broadcast an "have I found my current horizon?"
flag from each active processor to processor #0,
and use this to print a message each time a horizon is found
git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@951 f88db872-0e4f-0410-b76b-b9085cfa78c5
-rw-r--r-- | src/driver/Newton.cc | 175 |
1 files changed, 114 insertions, 61 deletions
diff --git a/src/driver/Newton.cc b/src/driver/Newton.cc index f6984ad..27ec3b0 100644 --- a/src/driver/Newton.cc +++ b/src/driver/Newton.cc @@ -3,7 +3,7 @@ // // Newton_solve - driver to solve Theta(h) = 0 for all our horizons /// reduce_and_broadcast - reduce/broadcast status to other processors -/// print_Theta_norms - print the Theta norms +/// print_iteration_status - print Newton-iteration status on each active proc /// Newton_step - take the Newton step, scaling it down if it's too large // @@ -51,12 +51,14 @@ namespace { bool reduce_and_broadcast(cGH* GH, int N_procs, int N_active_procs, int my_proc, bool my_active_flag, - bool we_need_more_iterations, + bool we_need_more_iterations, bool found_this_horizon, int hn, int iteration, fp rms_norm, fp infinity_norm, + bool found_horizon_buffer[], int hn_buffer[], int iteration_buffer[], fp rms_norm_buffer[], fp infinity_norm_buffer[]); -void print_Theta_norms +void print_iteration_status (int N_active_procs, + const bool found_horizon_buffer[], const int hn_buffer[], const int iteration_buffer[], const fp rms_norm_buffer[], const fp infinity_norm_buffer[]); void Newton_step(patch_system& ps, @@ -101,25 +103,28 @@ const bool my_active_flag = hs.has_genuine_horizons(); const int N_horizons = hs.N_horizons(); // buffers for reduce_and_broadcast() -int* hn_buffer = NULL; -int* iteration_buffer = NULL; -fp * rms_norm_buffer = NULL; -fp * infinity_norm_buffer = NULL; +bool* found_horizon_buffer = NULL; +int* hn_buffer = NULL; +int* iteration_buffer = NULL; +fp * rms_norm_buffer = NULL; +fp * infinity_norm_buffer = NULL; if (hn_buffer == NULL) then { // allocate on first call - hn_buffer = new int[N_active_procs]; - iteration_buffer = new int[N_active_procs]; - rms_norm_buffer = new fp [N_active_procs]; - infinity_norm_buffer = new fp [N_active_procs]; + found_horizon_buffer = new bool[N_active_procs]; + hn_buffer = new int [N_active_procs]; + iteration_buffer = new int [N_active_procs]; + rms_norm_buffer = new fp [N_active_procs]; + infinity_norm_buffer = new fp [N_active_procs]; } // print out which horizons we're finding on this processor if (hs.has_genuine_horizons()) then CCTK_VInfo(CCTK_THORNSTRING, - "proc %d: searching for horizon%s %s", + "proc %d: searching for horizon%s %s/%d", my_proc, - (N_horizons > 1 ? "s" : ""), hs.sequence_string(",")); + (N_horizons > 1 ? "s" : ""), + hs.sequence_string(","), N_horizons); else CCTK_VInfo(CCTK_THORNSTRING, "proc %d: dummy horizon(s) only", my_proc); @@ -252,17 +257,19 @@ if (hs.has_genuine_horizons()) int(we_need_more_iterations)); } - // does any processor need more iterations - // on its current or a following horizon? + // reduce/broadcast status flags and norms to all processors + // ==> determine if any processor needs more iterations + // on its current or a following horizon? const bool any_proc_needs_more_iterations = reduce_and_broadcast(GH, N_procs, N_active_procs, my_proc, my_active_flag, - we_need_more_iterations, + we_need_more_iterations, found_this_horizon, hn, iteration, ((horizon_is_genuine && Theta_is_ok) ? Theta_norms.rms_norm() : 0.0), ((horizon_is_genuine && Theta_is_ok) ? Theta_norms.infinity_norm() : 0.0), + found_horizon_buffer, hn_buffer, iteration_buffer, rms_norm_buffer, infinity_norm_buffer); @@ -273,9 +280,10 @@ if (hs.has_genuine_horizons()) int(any_proc_needs_more_iterations)); } if (my_active_flag && verbose_info.print_algorithm_highlights) - then print_Theta_norms(N_active_procs, - hn_buffer, iteration_buffer, - rms_norm_buffer, infinity_norm_buffer); + then print_iteration_status(N_active_procs, + found_horizon_buffer, + hn_buffer, iteration_buffer, + rms_norm_buffer, infinity_norm_buffer); if (found_this_horizon) then { compute_BH_diagnostics(*ps_ptr, BH_diagnostics_info, @@ -421,8 +429,8 @@ assert( false ); // This function does an inclusive-or--reduction of the "we need more // iterations (either on this or on another genuine horizon" flags across // all active processors. It also broadcasts which horizon we're working -// on, the current iteration number, and the current error norms, to -// processor #0. +// on, the current iteration number, whether we've found this horizon, +// and the current error norms, to processor #0. // // The reduction and broadcast are combined in this function to allow // doing them all with a single reduction operation (and thus only a @@ -436,14 +444,16 @@ assert( false ); // we_need_more_iterations = "we need more iterations (either on this // or on another genuine horizon" flag to be // inclusive-or--reduced +// found_this_horizon = "have we found this horizon?" flag to be broadcast // hn,iteration,{rms,infinity}_norm = the values to be broadcast: // if this is an active processor these // are broadcast to processor #0, // otherwise these are ignored -// {hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs] -// = buffers for horizon numbers and norms: if this is processor #0 -// these are set to the horizon numbers and norms for all active -// processors, otherwise these are set to undefined (garbage) values +// {found_horizon,hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs] +// = buffers for flags, horizon numbers, and norms: if this is processor +// #0 these are set to the found-this-horizon-flags, horizon numbers, +// and norms for all active processors, otherwise these are set to +// undefined (garbage) values // // Results: // This function returns the inclusive-or--reduction of the @@ -453,8 +463,9 @@ namespace { bool reduce_and_broadcast(cGH* GH, int N_procs, int N_active_procs, int my_proc, bool my_active_flag, - bool we_need_more_iterations, + bool we_need_more_iterations, bool found_this_horizon, int hn, int iteration, fp rms_norm, fp infinity_norm, + bool found_horizon_buffer[], int hn_buffer[], int iteration_buffer[], fp rms_norm_buffer[], fp infinity_norm_buffer[]) { @@ -462,38 +473,54 @@ assert( my_proc >= 0 ); assert( my_proc < N_procs ); // // We do the combined reduce/broadcast in a single reduction operation -// by setting the buffers to all 0s, except that on each processor the -// [my_proc] entries have the values we want to reduce/broadcast, then -// doing a sum-reduction of the buffers. +// by setting up a 2-D buffer whose entries are all 0s, except that on each +// processor the [my_proc] row hase the values we want to reduce/broadcast, +// then doing a sum-reduction of the buffers across processors. // // Alas, to do everything in a single operation, all the values have to // be of a single datatype in a single array, so we convert hn and -// iteration to CCTK_REAL, and encode the "we need more iterations" in -// hn's sign. -// -const int N_vars = 4; -const int N_reduce_buffer = N_vars*N_active_procs; -static CCTK_REAL* reduce_buffer = NULL; -if (reduce_buffer == NULL) +// iteration to CCTK_REAL, and encode the Boolean flags in other +// variables' signs. Also, for purposes of the reduction we treat the +// buffer as a 1-D array. +// + +// the buffer is actually a 2-D array; these are the column numbers +// ... n.b. input and output buffers must be distinct! +enum { + buffer_var__hn = 0, + buffer_var__iteration, + buffer_var__rms_norm, + buffer_var__infinity_norm, + N_buffer_vars + }; + +static jtutil::array2d<CCTK_REAL> *reduce_buffer_in_ptr = NULL; +static jtutil::array2d<CCTK_REAL> *reduce_buffer_out_ptr = NULL; +if (reduce_buffer_in_ptr == NULL) then { - // allocate on first call - reduce_buffer = new CCTK_REAL[N_reduce_buffer]; + // allocate on first call (constructor automagically zero-initializes) + reduce_buffer_in_ptr = new jtutil::array2d<CCTK_REAL> + (0, N_procs-1, + 0, N_buffer_vars-1); + reduce_buffer_out_ptr = new jtutil::array2d<CCTK_REAL> + (0, N_procs-1, + 0, N_buffer_vars-1); } +jtutil::array2d<CCTK_REAL>& reduce_buffer_in = *reduce_buffer_in_ptr; +jtutil::array2d<CCTK_REAL>& reduce_buffer_out = *reduce_buffer_out_ptr; // -// pack the values into the reduction buffer +// pack this processor's values into the reduction buffer // - for (int i = 0 ; i < N_reduce_buffer ; ++i) - { - reduce_buffer[i] = 0.0; - } if (my_active_flag) then { - assert( my_proc < N_active_procs ); - reduce_buffer[N_vars*my_proc + 0] = we_need_more_iterations ? hn : -hn; - reduce_buffer[N_vars*my_proc + 1] = iteration; - reduce_buffer[N_vars*my_proc + 2] = rms_norm; - reduce_buffer[N_vars*my_proc + 3] = infinity_norm; + assert( reduce_buffer_in.is_valid_i(my_proc) ); + reduce_buffer_in(my_proc, buffer_var__hn) + = we_need_more_iterations ? +hn : -hn; + reduce_buffer_in(my_proc, buffer_var__iteration) + = found_this_horizon ? +iteration : -iteration; + reduce_buffer_in(my_proc, buffer_var__rms_norm) = rms_norm; + reduce_buffer_in(my_proc, buffer_var__infinity_norm) = infinity_norm; } // @@ -507,13 +534,19 @@ if (reduction_handle < 0) "reduce_and_broadcast(): can't get sum-reduction handle!"); /*NOTREACHED*/ +// FIXME: CCTK_ReduceLocArrayToArray1D() prototypes the in_data argument +// as void* , not const void* const int status = CCTK_ReduceLocArrayToArray1D(GH, -1, // all processors get result reduction_handle, - (/*const*/ void*) reduce_buffer, - ( void*) reduce_buffer, - N_reduce_buffer, + static_cast</*const*/ void*>( + reduce_buffer_in.data_array() + ), + static_cast< void*>( + reduce_buffer_out.data_array() + ), + reduce_buffer_in.N_array(), CCTK_VARIABLE_REAL); if (status < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, @@ -526,13 +559,21 @@ if (status < 0) bool any_proc_needs_more_iterations = false; for (int proc = 0 ; proc < N_active_procs ; ++proc) { - const int i_proc = N_vars*proc; - const bool proc_needs_more_iterations = reduce_buffer[i_proc + 0] > 0.0; + const bool proc_needs_more_iterations + = reduce_buffer_out(proc,buffer_var__hn) > 0.0; + found_horizon_buffer[proc] + = reduce_buffer_out(proc,buffer_var__iteration) > 0.0; + any_proc_needs_more_iterations |= proc_needs_more_iterations; - hn_buffer[proc] = (int) fabs(reduce_buffer[i_proc + 0]); - iteration_buffer[proc] = (int) reduce_buffer[i_proc + 1]; - rms_norm_buffer[proc] = reduce_buffer[i_proc + 2]; - infinity_norm_buffer[proc] = reduce_buffer[i_proc + 3]; + + hn_buffer[proc] + = int(fabs(reduce_buffer_out(proc, buffer_var__hn))); + iteration_buffer[proc] + = int(fabs(reduce_buffer_out(proc, buffer_var__iteration))); + rms_norm_buffer[proc] + = reduce_buffer_out(proc, buffer_var__rms_norm); + infinity_norm_buffer[proc] + = reduce_buffer_out(proc, buffer_var__infinity_norm); } return any_proc_needs_more_iterations; @@ -542,10 +583,14 @@ return any_proc_needs_more_iterations; //****************************************************************************** // -// This function is called on processor #0 to print the Theta norms. +// This function is called on processor #0 to print the status of the +// Newton iteration on each active processor. // // Arguments: // N_active_procs = The number of active processors. +// found_horizon_buffer[N_active_procs] = Tells whether each active processor +// has (just) successfully found its +// horizon. // hn_buffer[N_active_procs] = Gives the horizon each active processor // is currently working on. // iteration_buffer[N_active_procs] = Gives the iteration number of each @@ -554,8 +599,9 @@ return any_proc_needs_more_iterations; // each active processor. // namespace { -void print_Theta_norms +void print_iteration_status (int N_active_procs, + const bool found_horizon_buffer[], const int hn_buffer[], const int iteration_buffer[], const fp rms_norm_buffer[], const fp infinity_norm_buffer[]) { @@ -565,11 +611,18 @@ void print_Theta_norms then CCTK_VInfo(CCTK_THORNSTRING, " proc %d/dummy horizon", proc); - else CCTK_VInfo(CCTK_THORNSTRING, - " proc %d/horizon %d/it %d |Theta| rms=%.1e inf=%.1e", + else { + CCTK_VInfo(CCTK_THORNSTRING, + " proc %d/horizon %d:it %d |Theta| rms=%.1e inf=%.1e", proc, hn_buffer[proc], iteration_buffer[proc], double(rms_norm_buffer[proc]), double(infinity_norm_buffer[proc])); + if (found_horizon_buffer[proc]) + then CCTK_VInfo(CCTK_THORNSTRING, + " proc %d/horizon %d:found at iteration %d", + proc, hn_buffer[proc], + iteration_buffer[proc]); + } } } } |