aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>2003-02-26 17:57:20 +0000
committerjthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>2003-02-26 17:57:20 +0000
commit07995eaa14b03487dc1333b91aa7f6c998eca45a (patch)
tree65afd5c42fb5231d0bf253a397ae6b78a4ee1d8e
parent2d11cab54e2c0815481fb23c4fdcd1a74dcc34d1 (diff)
broadcast an "have I found my current horizon?"
flag from each active processor to processor #0, and use this to print a message each time a horizon is found git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@951 f88db872-0e4f-0410-b76b-b9085cfa78c5
-rw-r--r--src/driver/Newton.cc175
1 files changed, 114 insertions, 61 deletions
diff --git a/src/driver/Newton.cc b/src/driver/Newton.cc
index f6984ad..27ec3b0 100644
--- a/src/driver/Newton.cc
+++ b/src/driver/Newton.cc
@@ -3,7 +3,7 @@
//
// Newton_solve - driver to solve Theta(h) = 0 for all our horizons
/// reduce_and_broadcast - reduce/broadcast status to other processors
-/// print_Theta_norms - print the Theta norms
+/// print_iteration_status - print Newton-iteration status on each active proc
/// Newton_step - take the Newton step, scaling it down if it's too large
//
@@ -51,12 +51,14 @@ namespace {
bool reduce_and_broadcast(cGH* GH,
int N_procs, int N_active_procs,
int my_proc, bool my_active_flag,
- bool we_need_more_iterations,
+ bool we_need_more_iterations, bool found_this_horizon,
int hn, int iteration, fp rms_norm, fp infinity_norm,
+ bool found_horizon_buffer[],
int hn_buffer[], int iteration_buffer[],
fp rms_norm_buffer[], fp infinity_norm_buffer[]);
-void print_Theta_norms
+void print_iteration_status
(int N_active_procs,
+ const bool found_horizon_buffer[],
const int hn_buffer[], const int iteration_buffer[],
const fp rms_norm_buffer[], const fp infinity_norm_buffer[]);
void Newton_step(patch_system& ps,
@@ -101,25 +103,28 @@ const bool my_active_flag = hs.has_genuine_horizons();
const int N_horizons = hs.N_horizons();
// buffers for reduce_and_broadcast()
-int* hn_buffer = NULL;
-int* iteration_buffer = NULL;
-fp * rms_norm_buffer = NULL;
-fp * infinity_norm_buffer = NULL;
+bool* found_horizon_buffer = NULL;
+int* hn_buffer = NULL;
+int* iteration_buffer = NULL;
+fp * rms_norm_buffer = NULL;
+fp * infinity_norm_buffer = NULL;
if (hn_buffer == NULL)
then {
// allocate on first call
- hn_buffer = new int[N_active_procs];
- iteration_buffer = new int[N_active_procs];
- rms_norm_buffer = new fp [N_active_procs];
- infinity_norm_buffer = new fp [N_active_procs];
+ found_horizon_buffer = new bool[N_active_procs];
+ hn_buffer = new int [N_active_procs];
+ iteration_buffer = new int [N_active_procs];
+ rms_norm_buffer = new fp [N_active_procs];
+ infinity_norm_buffer = new fp [N_active_procs];
}
// print out which horizons we're finding on this processor
if (hs.has_genuine_horizons())
then CCTK_VInfo(CCTK_THORNSTRING,
- "proc %d: searching for horizon%s %s",
+ "proc %d: searching for horizon%s %s/%d",
my_proc,
- (N_horizons > 1 ? "s" : ""), hs.sequence_string(","));
+ (N_horizons > 1 ? "s" : ""),
+ hs.sequence_string(","), N_horizons);
else CCTK_VInfo(CCTK_THORNSTRING,
"proc %d: dummy horizon(s) only",
my_proc);
@@ -252,17 +257,19 @@ if (hs.has_genuine_horizons())
int(we_need_more_iterations));
}
- // does any processor need more iterations
- // on its current or a following horizon?
+ // reduce/broadcast status flags and norms to all processors
+ // ==> determine if any processor needs more iterations
+ // on its current or a following horizon?
const bool any_proc_needs_more_iterations
= reduce_and_broadcast(GH, N_procs, N_active_procs,
my_proc, my_active_flag,
- we_need_more_iterations,
+ we_need_more_iterations, found_this_horizon,
hn, iteration,
((horizon_is_genuine && Theta_is_ok)
? Theta_norms.rms_norm() : 0.0),
((horizon_is_genuine && Theta_is_ok)
? Theta_norms.infinity_norm() : 0.0),
+ found_horizon_buffer,
hn_buffer, iteration_buffer,
rms_norm_buffer, infinity_norm_buffer);
@@ -273,9 +280,10 @@ if (hs.has_genuine_horizons())
int(any_proc_needs_more_iterations));
}
if (my_active_flag && verbose_info.print_algorithm_highlights)
- then print_Theta_norms(N_active_procs,
- hn_buffer, iteration_buffer,
- rms_norm_buffer, infinity_norm_buffer);
+ then print_iteration_status(N_active_procs,
+ found_horizon_buffer,
+ hn_buffer, iteration_buffer,
+ rms_norm_buffer, infinity_norm_buffer);
if (found_this_horizon)
then {
compute_BH_diagnostics(*ps_ptr, BH_diagnostics_info,
@@ -421,8 +429,8 @@ assert( false );
// This function does an inclusive-or--reduction of the "we need more
// iterations (either on this or on another genuine horizon" flags across
// all active processors. It also broadcasts which horizon we're working
-// on, the current iteration number, and the current error norms, to
-// processor #0.
+// on, the current iteration number, whether we've found this horizon,
+// and the current error norms, to processor #0.
//
// The reduction and broadcast are combined in this function to allow
// doing them all with a single reduction operation (and thus only a
@@ -436,14 +444,16 @@ assert( false );
// we_need_more_iterations = "we need more iterations (either on this
// or on another genuine horizon" flag to be
// inclusive-or--reduced
+// found_this_horizon = "have we found this horizon?" flag to be broadcast
// hn,iteration,{rms,infinity}_norm = the values to be broadcast:
// if this is an active processor these
// are broadcast to processor #0,
// otherwise these are ignored
-// {hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs]
-// = buffers for horizon numbers and norms: if this is processor #0
-// these are set to the horizon numbers and norms for all active
-// processors, otherwise these are set to undefined (garbage) values
+// {found_horizon,hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs]
+// = buffers for flags, horizon numbers, and norms: if this is processor
+// #0 these are set to the found-this-horizon-flags, horizon numbers,
+// and norms for all active processors, otherwise these are set to
+// undefined (garbage) values
//
// Results:
// This function returns the inclusive-or--reduction of the
@@ -453,8 +463,9 @@ namespace {
bool reduce_and_broadcast(cGH* GH,
int N_procs, int N_active_procs,
int my_proc, bool my_active_flag,
- bool we_need_more_iterations,
+ bool we_need_more_iterations, bool found_this_horizon,
int hn, int iteration, fp rms_norm, fp infinity_norm,
+ bool found_horizon_buffer[],
int hn_buffer[], int iteration_buffer[],
fp rms_norm_buffer[], fp infinity_norm_buffer[])
{
@@ -462,38 +473,54 @@ assert( my_proc >= 0 );
assert( my_proc < N_procs );
//
// We do the combined reduce/broadcast in a single reduction operation
-// by setting the buffers to all 0s, except that on each processor the
-// [my_proc] entries have the values we want to reduce/broadcast, then
-// doing a sum-reduction of the buffers.
+// by setting up a 2-D buffer whose entries are all 0s, except that on each
+// processor the [my_proc] row hase the values we want to reduce/broadcast,
+// then doing a sum-reduction of the buffers across processors.
//
// Alas, to do everything in a single operation, all the values have to
// be of a single datatype in a single array, so we convert hn and
-// iteration to CCTK_REAL, and encode the "we need more iterations" in
-// hn's sign.
-//
-const int N_vars = 4;
-const int N_reduce_buffer = N_vars*N_active_procs;
-static CCTK_REAL* reduce_buffer = NULL;
-if (reduce_buffer == NULL)
+// iteration to CCTK_REAL, and encode the Boolean flags in other
+// variables' signs. Also, for purposes of the reduction we treat the
+// buffer as a 1-D array.
+//
+
+// the buffer is actually a 2-D array; these are the column numbers
+// ... n.b. input and output buffers must be distinct!
+enum {
+ buffer_var__hn = 0,
+ buffer_var__iteration,
+ buffer_var__rms_norm,
+ buffer_var__infinity_norm,
+ N_buffer_vars
+ };
+
+static jtutil::array2d<CCTK_REAL> *reduce_buffer_in_ptr = NULL;
+static jtutil::array2d<CCTK_REAL> *reduce_buffer_out_ptr = NULL;
+if (reduce_buffer_in_ptr == NULL)
then {
- // allocate on first call
- reduce_buffer = new CCTK_REAL[N_reduce_buffer];
+ // allocate on first call (constructor automagically zero-initializes)
+ reduce_buffer_in_ptr = new jtutil::array2d<CCTK_REAL>
+ (0, N_procs-1,
+ 0, N_buffer_vars-1);
+ reduce_buffer_out_ptr = new jtutil::array2d<CCTK_REAL>
+ (0, N_procs-1,
+ 0, N_buffer_vars-1);
}
+jtutil::array2d<CCTK_REAL>& reduce_buffer_in = *reduce_buffer_in_ptr;
+jtutil::array2d<CCTK_REAL>& reduce_buffer_out = *reduce_buffer_out_ptr;
//
-// pack the values into the reduction buffer
+// pack this processor's values into the reduction buffer
//
- for (int i = 0 ; i < N_reduce_buffer ; ++i)
- {
- reduce_buffer[i] = 0.0;
- }
if (my_active_flag)
then {
- assert( my_proc < N_active_procs );
- reduce_buffer[N_vars*my_proc + 0] = we_need_more_iterations ? hn : -hn;
- reduce_buffer[N_vars*my_proc + 1] = iteration;
- reduce_buffer[N_vars*my_proc + 2] = rms_norm;
- reduce_buffer[N_vars*my_proc + 3] = infinity_norm;
+ assert( reduce_buffer_in.is_valid_i(my_proc) );
+ reduce_buffer_in(my_proc, buffer_var__hn)
+ = we_need_more_iterations ? +hn : -hn;
+ reduce_buffer_in(my_proc, buffer_var__iteration)
+ = found_this_horizon ? +iteration : -iteration;
+ reduce_buffer_in(my_proc, buffer_var__rms_norm) = rms_norm;
+ reduce_buffer_in(my_proc, buffer_var__infinity_norm) = infinity_norm;
}
//
@@ -507,13 +534,19 @@ if (reduction_handle < 0)
"reduce_and_broadcast(): can't get sum-reduction handle!");
/*NOTREACHED*/
+// FIXME: CCTK_ReduceLocArrayToArray1D() prototypes the in_data argument
+// as void* , not const void*
const int status
= CCTK_ReduceLocArrayToArray1D(GH,
-1, // all processors get result
reduction_handle,
- (/*const*/ void*) reduce_buffer,
- ( void*) reduce_buffer,
- N_reduce_buffer,
+ static_cast</*const*/ void*>(
+ reduce_buffer_in.data_array()
+ ),
+ static_cast< void*>(
+ reduce_buffer_out.data_array()
+ ),
+ reduce_buffer_in.N_array(),
CCTK_VARIABLE_REAL);
if (status < 0)
then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING,
@@ -526,13 +559,21 @@ if (status < 0)
bool any_proc_needs_more_iterations = false;
for (int proc = 0 ; proc < N_active_procs ; ++proc)
{
- const int i_proc = N_vars*proc;
- const bool proc_needs_more_iterations = reduce_buffer[i_proc + 0] > 0.0;
+ const bool proc_needs_more_iterations
+ = reduce_buffer_out(proc,buffer_var__hn) > 0.0;
+ found_horizon_buffer[proc]
+ = reduce_buffer_out(proc,buffer_var__iteration) > 0.0;
+
any_proc_needs_more_iterations |= proc_needs_more_iterations;
- hn_buffer[proc] = (int) fabs(reduce_buffer[i_proc + 0]);
- iteration_buffer[proc] = (int) reduce_buffer[i_proc + 1];
- rms_norm_buffer[proc] = reduce_buffer[i_proc + 2];
- infinity_norm_buffer[proc] = reduce_buffer[i_proc + 3];
+
+ hn_buffer[proc]
+ = int(fabs(reduce_buffer_out(proc, buffer_var__hn)));
+ iteration_buffer[proc]
+ = int(fabs(reduce_buffer_out(proc, buffer_var__iteration)));
+ rms_norm_buffer[proc]
+ = reduce_buffer_out(proc, buffer_var__rms_norm);
+ infinity_norm_buffer[proc]
+ = reduce_buffer_out(proc, buffer_var__infinity_norm);
}
return any_proc_needs_more_iterations;
@@ -542,10 +583,14 @@ return any_proc_needs_more_iterations;
//******************************************************************************
//
-// This function is called on processor #0 to print the Theta norms.
+// This function is called on processor #0 to print the status of the
+// Newton iteration on each active processor.
//
// Arguments:
// N_active_procs = The number of active processors.
+// found_horizon_buffer[N_active_procs] = Tells whether each active processor
+// has (just) successfully found its
+// horizon.
// hn_buffer[N_active_procs] = Gives the horizon each active processor
// is currently working on.
// iteration_buffer[N_active_procs] = Gives the iteration number of each
@@ -554,8 +599,9 @@ return any_proc_needs_more_iterations;
// each active processor.
//
namespace {
-void print_Theta_norms
+void print_iteration_status
(int N_active_procs,
+ const bool found_horizon_buffer[],
const int hn_buffer[], const int iteration_buffer[],
const fp rms_norm_buffer[], const fp infinity_norm_buffer[])
{
@@ -565,11 +611,18 @@ void print_Theta_norms
then CCTK_VInfo(CCTK_THORNSTRING,
" proc %d/dummy horizon",
proc);
- else CCTK_VInfo(CCTK_THORNSTRING,
- " proc %d/horizon %d/it %d |Theta| rms=%.1e inf=%.1e",
+ else {
+ CCTK_VInfo(CCTK_THORNSTRING,
+ " proc %d/horizon %d:it %d |Theta| rms=%.1e inf=%.1e",
proc, hn_buffer[proc], iteration_buffer[proc],
double(rms_norm_buffer[proc]),
double(infinity_norm_buffer[proc]));
+ if (found_horizon_buffer[proc])
+ then CCTK_VInfo(CCTK_THORNSTRING,
+ " proc %d/horizon %d:found at iteration %d",
+ proc, hn_buffer[proc],
+ iteration_buffer[proc]);
+ }
}
}
}