broadcast an "have I found my current horizon?"

flag from each active processor to processor #0, and use this to print a message each time a horizon is found git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@951 f88db872-0e4f-0410-b76b-b9085cfa78c5
author: jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> 2003-02-26 17:57:20 +0000
committer: jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> 2003-02-26 17:57:20 +0000
commit: 07995eaa14b03487dc1333b91aa7f6c998eca45a (patch)
tree: 65afd5c42fb5231d0bf253a397ae6b78a4ee1d8e
parent: 2d11cab54e2c0815481fb23c4fdcd1a74dcc34d1 (diff)
1 files changed, 114 insertions, 61 deletions
diff --git a/src/driver/Newton.cc b/src/driver/Newton.cc
index f6984ad..27ec3b0 100644
--- a/src/driver/Newton.cc
+++ b/src/driver/Newton.cc
@@ -3,7 +3,7 @@
 //
 // Newton_solve - driver to solve Theta(h) = 0 for all our horizons
 /// reduce_and_broadcast - reduce/broadcast status to other processors
-/// print_Theta_norms - print the Theta norms
+/// print_iteration_status - print Newton-iteration status on each active proc
 /// Newton_step - take the Newton step, scaling it down if it's too large
 //
 
@@ -51,12 +51,14 @@ namespace {
 bool reduce_and_broadcast(cGH* GH,
 			  int N_procs, int N_active_procs,
 			  int my_proc, bool my_active_flag,
-			  bool we_need_more_iterations,
+			  bool we_need_more_iterations, bool found_this_horizon,
 			  int hn, int iteration, fp rms_norm, fp infinity_norm,
+			  bool found_horizon_buffer[],
 			  int hn_buffer[], int iteration_buffer[],
 			  fp rms_norm_buffer[], fp infinity_norm_buffer[]);
-void print_Theta_norms
+void print_iteration_status
 	(int N_active_procs,
+	 const bool found_horizon_buffer[],
 	 const int hn_buffer[], const int iteration_buffer[],
 	 const fp rms_norm_buffer[], const fp infinity_norm_buffer[]);
 void Newton_step(patch_system& ps,
@@ -101,25 +103,28 @@ const bool my_active_flag = hs.has_genuine_horizons();
 const int N_horizons = hs.N_horizons();
 
 // buffers for  reduce_and_broadcast()
-int*            hn_buffer = NULL;
-int*     iteration_buffer = NULL;
-fp *      rms_norm_buffer = NULL;
-fp * infinity_norm_buffer = NULL;
+bool* found_horizon_buffer = NULL;
+int*             hn_buffer = NULL;
+int*      iteration_buffer = NULL;
+fp *       rms_norm_buffer = NULL;
+fp *  infinity_norm_buffer = NULL;
 if (hn_buffer == NULL)
    then {
 	// allocate on first call
-	           hn_buffer = new int[N_active_procs];
-	    iteration_buffer = new int[N_active_procs];
-	     rms_norm_buffer = new fp [N_active_procs];
-	infinity_norm_buffer = new fp [N_active_procs];
+	found_horizon_buffer = new bool[N_active_procs];
+	           hn_buffer = new int [N_active_procs];
+	    iteration_buffer = new int [N_active_procs];
+	     rms_norm_buffer = new fp  [N_active_procs];
+	infinity_norm_buffer = new fp  [N_active_procs];
 	}
 
 // print out which horizons we're finding on this processor
 if (hs.has_genuine_horizons())
    then CCTK_VInfo(CCTK_THORNSTRING,
-		   "proc %d: searching for horizon%s %s",
+		   "proc %d: searching for horizon%s %s/%d",
 		   my_proc,
-		   (N_horizons > 1 ? "s" : ""), hs.sequence_string(","));
+		   (N_horizons > 1 ? "s" : ""),
+		   hs.sequence_string(","), N_horizons);
    else CCTK_VInfo(CCTK_THORNSTRING,
 		   "proc %d: dummy horizon(s) only",
 		   my_proc);
@@ -252,17 +257,19 @@ if (hs.has_genuine_horizons())
 			   int(we_need_more_iterations));
 		}
 
-	// does any processor need more iterations
-	// on its current or a following horizon?
+	// reduce/broadcast status flags and norms to all processors
+	// ==> determine if any processor needs more iterations
+	//     on its current or a following horizon?
 	const bool any_proc_needs_more_iterations
 	   = reduce_and_broadcast(GH, N_procs, N_active_procs,
 				  my_proc, my_active_flag,
-				  we_need_more_iterations,
+				  we_need_more_iterations, found_this_horizon,
 				  hn, iteration,
 				  ((horizon_is_genuine && Theta_is_ok)
 				   ? Theta_norms.rms_norm() : 0.0),
 		 		  ((horizon_is_genuine && Theta_is_ok)
 				   ? Theta_norms.infinity_norm() : 0.0),
+				  found_horizon_buffer,
 				  hn_buffer, iteration_buffer,
 				  rms_norm_buffer, infinity_norm_buffer);
 
@@ -273,9 +280,10 @@ if (hs.has_genuine_horizons())
 			   int(any_proc_needs_more_iterations));
 		}
 	if (my_active_flag && verbose_info.print_algorithm_highlights)
-	   then print_Theta_norms(N_active_procs,
-				  hn_buffer, iteration_buffer,
-				  rms_norm_buffer, infinity_norm_buffer);
+	   then print_iteration_status(N_active_procs,
+				       found_horizon_buffer,
+				       hn_buffer, iteration_buffer,
+				       rms_norm_buffer, infinity_norm_buffer);
 	if (found_this_horizon)
 	   then {
 		compute_BH_diagnostics(*ps_ptr, BH_diagnostics_info,
@@ -421,8 +429,8 @@ assert( false );
 // This function does an inclusive-or--reduction of the "we need more
 // iterations (either on this or on another genuine horizon" flags across
 // all active processors.  It also broadcasts which horizon we're working
-// on, the current iteration number, and the current error norms, to
-// processor #0.
+// on, the current iteration number, whether we've found this horizon,
+// and the current error norms, to processor #0.
 //
 // The reduction and broadcast are combined in this function to allow
 // doing them all with a single reduction operation (and thus only a
@@ -436,14 +444,16 @@ assert( false );
 // we_need_more_iterations = "we need more iterations (either on this
 //			      or on another genuine horizon" flag to be
 //			     inclusive-or--reduced
+// found_this_horizon = "have we found this horizon?" flag to be broadcast
 // hn,iteration,{rms,infinity}_norm = the values to be broadcast:
 //				      if this is an active processor these
 //				      are broadcast to processor #0,
 //				      otherwise these are ignored
-// {hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs]
-//	= buffers for horizon numbers and norms: if this is processor #0
-//	  these are set to the horizon numbers and norms for all active
-//	  processors, otherwise these are set to undefined (garbage) values
+// {found_horizon,hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs]
+//	= buffers for flags, horizon numbers, and norms: if this is processor
+//	  #0 these are set to the found-this-horizon-flags, horizon numbers,
+//	  and norms for all active processors, otherwise these are set to
+//	  undefined (garbage) values
 //
 // Results:
 // This function returns the inclusive-or--reduction of the
@@ -453,8 +463,9 @@ namespace {
 bool reduce_and_broadcast(cGH* GH,
 			  int N_procs, int N_active_procs,
 			  int my_proc, bool my_active_flag,
-			  bool we_need_more_iterations,
+			  bool we_need_more_iterations, bool found_this_horizon,
 			  int hn, int iteration, fp rms_norm, fp infinity_norm,
+			  bool found_horizon_buffer[],
 			  int hn_buffer[], int iteration_buffer[],
 			  fp rms_norm_buffer[], fp infinity_norm_buffer[])
 {
@@ -462,38 +473,54 @@ assert( my_proc >= 0 );
 assert( my_proc < N_procs );
 //
 // We do the combined reduce/broadcast in a single reduction operation
-// by setting the buffers to all 0s, except that on each processor the
-// [my_proc] entries have the values we want to reduce/broadcast, then
-// doing a sum-reduction of the buffers.
+// by setting up a 2-D buffer whose entries are all 0s, except that on each
+// processor the [my_proc] row hase the values we want to reduce/broadcast,
+// then doing a sum-reduction of the buffers across processors.
 //
 // Alas, to do everything in a single operation, all the values have to
 // be of a single datatype in a single array, so we convert  hn  and
-//  iteration  to CCTK_REAL, and encode the "we need more iterations" in
-// hn's sign.
-//
-const int N_vars = 4;
-const int N_reduce_buffer = N_vars*N_active_procs;
-static CCTK_REAL* reduce_buffer = NULL;
-if (reduce_buffer == NULL)
+//  iteration  to CCTK_REAL, and encode the Boolean flags in other
+// variables' signs.  Also, for purposes of the reduction we treat the
+// buffer as a 1-D array.
+//
+
+// the buffer is actually a 2-D array; these are the column numbers
+// ... n.b. input and output buffers must be distinct!
+enum	{
+	buffer_var__hn = 0,
+	buffer_var__iteration,
+	buffer_var__rms_norm,
+	buffer_var__infinity_norm,
+	N_buffer_vars
+	};
+
+static jtutil::array2d<CCTK_REAL> *reduce_buffer_in_ptr  = NULL;
+static jtutil::array2d<CCTK_REAL> *reduce_buffer_out_ptr = NULL;
+if (reduce_buffer_in_ptr == NULL)
    then {
-	// allocate on first call
-	reduce_buffer = new CCTK_REAL[N_reduce_buffer];
+	// allocate on first call (constructor automagically zero-initializes)
+	reduce_buffer_in_ptr  = new jtutil::array2d<CCTK_REAL>
+						   (0, N_procs-1,
+						    0, N_buffer_vars-1);
+	reduce_buffer_out_ptr = new jtutil::array2d<CCTK_REAL>
+						   (0, N_procs-1,
+						    0, N_buffer_vars-1);
 	}
+jtutil::array2d<CCTK_REAL>& reduce_buffer_in  = *reduce_buffer_in_ptr;
+jtutil::array2d<CCTK_REAL>& reduce_buffer_out = *reduce_buffer_out_ptr;
 
 //
-// pack the values into the reduction buffer
+// pack this processor's values into the reduction buffer
 //
-	for (int i = 0 ; i < N_reduce_buffer ; ++i)
-	{
-	reduce_buffer[i] = 0.0;
-	}
 if (my_active_flag)
    then {
-	assert( my_proc < N_active_procs );
-	reduce_buffer[N_vars*my_proc + 0] = we_need_more_iterations ? hn : -hn;
-	reduce_buffer[N_vars*my_proc + 1] = iteration;
-	reduce_buffer[N_vars*my_proc + 2] = rms_norm;
-	reduce_buffer[N_vars*my_proc + 3] = infinity_norm;
+	assert( reduce_buffer_in.is_valid_i(my_proc) );
+	reduce_buffer_in(my_proc, buffer_var__hn)
+		= we_need_more_iterations ? +hn : -hn;
+	reduce_buffer_in(my_proc, buffer_var__iteration)
+		= found_this_horizon ? +iteration : -iteration;
+	reduce_buffer_in(my_proc, buffer_var__rms_norm)      = rms_norm;
+	reduce_buffer_in(my_proc, buffer_var__infinity_norm) = infinity_norm;
 	}
 
 //
@@ -507,13 +534,19 @@ if (reduction_handle < 0)
 		   "reduce_and_broadcast(): can't get sum-reduction handle!");
 								/*NOTREACHED*/
 
+// FIXME: CCTK_ReduceLocArrayToArray1D() prototypes the in_data argument
+//        as  void* , not  const void*
 const int status
    = CCTK_ReduceLocArrayToArray1D(GH,
 				  -1,		// all processors get result
 				  reduction_handle,
-				  (/*const*/ void*) reduce_buffer,
-				  (          void*) reduce_buffer,
-				  N_reduce_buffer,
+				  static_cast</*const*/ void*>(
+					reduce_buffer_in.data_array()
+							      ),
+				  static_cast<      void*>(
+					reduce_buffer_out.data_array()
+							  ),
+				  reduce_buffer_in.N_array(),
 				  CCTK_VARIABLE_REAL);
 if (status < 0)
    then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING,
@@ -526,13 +559,21 @@ if (status < 0)
 bool any_proc_needs_more_iterations = false;
 	for (int proc = 0 ; proc < N_active_procs ; ++proc)
 	{
-	const int i_proc = N_vars*proc;
-	const bool proc_needs_more_iterations = reduce_buffer[i_proc + 0] > 0.0;
+	const bool proc_needs_more_iterations
+		= reduce_buffer_out(proc,buffer_var__hn) > 0.0;
+	found_horizon_buffer[proc]
+		= reduce_buffer_out(proc,buffer_var__iteration) > 0.0;
+
 	any_proc_needs_more_iterations |= proc_needs_more_iterations;
-	           hn_buffer[proc] = (int) fabs(reduce_buffer[i_proc + 0]);
-	    iteration_buffer[proc] = (int)      reduce_buffer[i_proc + 1];
-	     rms_norm_buffer[proc] =            reduce_buffer[i_proc + 2];
-	infinity_norm_buffer[proc] =            reduce_buffer[i_proc + 3];
+
+	hn_buffer[proc]
+		= int(fabs(reduce_buffer_out(proc, buffer_var__hn)));
+	iteration_buffer[proc]
+		= int(fabs(reduce_buffer_out(proc, buffer_var__iteration)));
+	rms_norm_buffer[proc]
+		=          reduce_buffer_out(proc, buffer_var__rms_norm);
+	infinity_norm_buffer[proc]
+		=          reduce_buffer_out(proc, buffer_var__infinity_norm);
 	}
 
 return any_proc_needs_more_iterations;
@@ -542,10 +583,14 @@ return any_proc_needs_more_iterations;
 //******************************************************************************
 
 //
-// This function is called on processor #0 to print the Theta norms.
+// This function is called on processor #0 to print the status of the
+// Newton iteration on each active processor.
 //
 // Arguments:
 // N_active_procs = The number of active processors.
+// found_horizon_buffer[N_active_procs] = Tells whether each active processor
+//					  has (just) successfully found its
+//					  horizon.
 // hn_buffer[N_active_procs] = Gives the horizon each active processor
 //			       is currently working on.
 // iteration_buffer[N_active_procs] = Gives the iteration number of each
@@ -554,8 +599,9 @@ return any_proc_needs_more_iterations;
 //						each active processor.
 //
 namespace {
-void print_Theta_norms
+void print_iteration_status
 	(int N_active_procs,
+	 const bool found_horizon_buffer[],
 	 const int hn_buffer[], const int iteration_buffer[],
 	 const fp rms_norm_buffer[], const fp infinity_norm_buffer[])
 {
@@ -565,11 +611,18 @@ void print_Theta_norms
 	   then CCTK_VInfo(CCTK_THORNSTRING,
 			   "   proc %d/dummy horizon",
 			   proc);
-	   else CCTK_VInfo(CCTK_THORNSTRING,
-		   "   proc %d/horizon %d/it %d |Theta| rms=%.1e inf=%.1e",
+	   else {
+		CCTK_VInfo(CCTK_THORNSTRING,
+		   "   proc %d/horizon %d:it %d |Theta| rms=%.1e inf=%.1e",
 			   proc, hn_buffer[proc], iteration_buffer[proc],
 			   double(rms_norm_buffer[proc]),
 			   double(infinity_norm_buffer[proc]));
+		if (found_horizon_buffer[proc])
+		   then CCTK_VInfo(CCTK_THORNSTRING,
+			   "   proc %d/horizon %d:found at iteration %d",
+				   proc, hn_buffer[proc],
+				   iteration_buffer[proc]);
+		}
 	}
 }
 	  }
author	jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>	2003-02-26 17:57:20 +0000
committer	jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>	2003-02-26 17:57:20 +0000
commit	07995eaa14b03487dc1333b91aa7f6c998eca45a (patch)
tree	65afd5c42fb5231d0bf253a397ae6b78a4ee1d8e
parent	2d11cab54e2c0815481fb23c4fdcd1a74dcc34d1 (diff)