From 6d98ca55d5298bbb472df1daa959e6f17b36d5f7 Mon Sep 17 00:00:00 2001 From: jthorn Date: Wed, 12 Mar 2003 20:08:36 +0000 Subject: major rewrite of multiprocessor synchronization logic to fix assorted bugs biggest change is that we now group all the per-Newton-iteration interprocessor communication logic into a single call per iteration; this eats more bandwidth, but saves on latency git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@971 f88db872-0e4f-0410-b76b-b9085cfa78c5 --- src/driver/BH_diagnostics.cc | 50 +++ src/driver/BH_diagnostics.hh | 17 + src/driver/Newton.cc | 646 +++++++++++++++++++++++------------- src/driver/README.parallel | 112 +++++-- src/driver/driver.hh | 80 ++++- src/driver/find_horizons.cc | 43 +-- src/driver/horizon_sequence.cc | 18 + src/driver/horizon_sequence.hh | 3 + src/driver/setup.cc | 152 +++++---- src/driver/test_horizon_sequence.cc | 70 ++++ 10 files changed, 824 insertions(+), 367 deletions(-) diff --git a/src/driver/BH_diagnostics.cc b/src/driver/BH_diagnostics.cc index 06f8423..c71b3af 100644 --- a/src/driver/BH_diagnostics.cc +++ b/src/driver/BH_diagnostics.cc @@ -3,6 +3,9 @@ // // BH_diagnostics::BH_diagnostics - initialize a struct BH_diagnostics // +// BH_diagnostics::copy_to_buffer - copy diagnostics to buffer +// BH_diagnostics::copy_from_buffer - copy buffer to diagnostics +// // BH_diagnostics::compute - compute BH diagnostics after an AH has been found /// BH_diagnostics::surface_integral - integrate gridfn over the 2-sphere // @@ -65,6 +68,53 @@ BH_diagnostics::BH_diagnostics() //****************************************************************************** //****************************************************************************** +// +// This function copies the diagnostics to a user-supplied buffer. +// +void BH_diagnostics::copy_to_buffer(CCTK_REAL buffer[N_buffer]) + const +{ +buffer[posn__centroid_x] = centroid_x; +buffer[posn__centroid_y] = centroid_y; +buffer[posn__centroid_z] = centroid_z; + +buffer[posn__min_radius] = min_radius; +buffer[posn__max_radius] = max_radius; +buffer[posn__mean_radius] = mean_radius; + +buffer[posn__circumference_xy] = circumference_xy; +buffer[posn__circumference_xz] = circumference_xz; +buffer[posn__circumference_yz] = circumference_yz; +buffer[posn__area] = area; +buffer[posn__m_irreducible] = m_irreducible; +} + +//****************************************************************************** + +// +// This function copies a user-supplied buffer to the diagnostics. +// +void BH_diagnostics::copy_from_buffer(const CCTK_REAL buffer[N_buffer]) +{ +centroid_x = buffer[posn__centroid_x]; +centroid_y = buffer[posn__centroid_y]; +centroid_z = buffer[posn__centroid_z]; + + min_radius = buffer[posn__min_radius]; + max_radius = buffer[posn__max_radius]; +mean_radius = buffer[posn__mean_radius]; + +circumference_xy = buffer[posn__circumference_xy]; +circumference_xz = buffer[posn__circumference_xz]; +circumference_yz = buffer[posn__circumference_yz]; + area = buffer[posn__area]; + m_irreducible = buffer[posn__m_irreducible]; +} + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + // // Given that an apparent horizon has been found, this function computes // various black hole diagnostics. diff --git a/src/driver/BH_diagnostics.hh b/src/driver/BH_diagnostics.hh index 40bd6c7..d450ec3 100644 --- a/src/driver/BH_diagnostics.hh +++ b/src/driver/BH_diagnostics.hh @@ -33,6 +33,23 @@ public: fp area; fp m_irreducible; +public: + // position of diagnostics in buffer and number of diagnostics + enum { + posn__centroid_x = 0, posn__centroid_y, posn__centroid_z, + posn__min_radius, posn__max_radius, posn__mean_radius, + + posn__circumference_xy, posn__circumference_xz, + posn__circumference_yz, + posn__area, + posn__m_irreducible, + N_buffer // no comma // size of buffer + }; + + // copy diagnostics to/from buffer + void copy_to_buffer ( CCTK_REAL buffer[N_buffer]) const; + void copy_from_buffer(const CCTK_REAL buffer[N_buffer]); + public: // compute diagnostics (assuming that apparent horizon has been found) void compute(const patch_system& ps, diff --git a/src/driver/Newton.cc b/src/driver/Newton.cc index 6486ec6..1daed56 100644 --- a/src/driver/Newton.cc +++ b/src/driver/Newton.cc @@ -2,8 +2,11 @@ // $Header$ // // Newton_solve - driver to solve Theta(h) = 0 for all our horizons -/// reduce_and_broadcast - reduce/broadcast status to other processors -/// print_iteration_status - print Newton-iteration status on each active proc +// +/// broadcast_status - broadcast status from active processors to all processors +/// broadcast_horizon_data - broadcast AH data to all processors +/// +/// print_status - print Newton-iteration status on each active proc /// Newton_step - take the Newton step, scaling it down if it's too large // @@ -49,19 +52,22 @@ using jtutil::error_exit; // namespace { -bool reduce_and_broadcast(cGH* GH, - int N_procs, int N_active_procs, - int my_proc, bool my_active_flag, - bool we_need_more_iterations, bool found_this_horizon, - int hn, int iteration, fp rms_norm, fp infinity_norm, - bool found_horizon_buffer[], - int hn_buffer[], int iteration_buffer[], - fp rms_norm_buffer[], fp infinity_norm_buffer[]); -void print_iteration_status - (int N_active_procs, - const bool found_horizon_buffer[], - const int hn_buffer[], const int iteration_buffer[], - const fp rms_norm_buffer[], const fp infinity_norm_buffer[]); +bool broadcast_status(const cGH* GH, + int N_procs, int N_active_procs, + int my_proc, bool my_active_flag, + int hn, int iteration, + enum expansion_status expansion_status, + fp rms_norm, fp infinity_norm, + bool found_this_horizon, bool I_need_more_iterations, + struct iteration_status_buffers& isb); +void broadcast_horizon_data(const cGH* GH, + bool broadcast_flag, + struct BH_diagnostics& BH_diagnostics, + patch_system& ps, + struct horizon_buffers& horizon_buffers); + +void print_status(int N_active_procs, + const struct iteration_status_buffers& isb); void Newton_step(patch_system& ps, fp max_allowable_Delta_h_over_h, const struct verbose_info& verbose_info); @@ -82,15 +88,11 @@ void Newton_step(patch_system& ps, // of the parallel/multiprocessor issues and algorithm. // // If this is a dummy processor (i.e. hs has no genuine horizons), then -// my_AH_info will be a NULL pointer. -// -// FIXME: Since the BH diagnostics printing is done with CCTK_VInfo(), -// in a multiprocessor Cactus run the output will typically be -// lost for processors other than processor #0. +// my_AH_data will be a NULL pointer. // -void Newton(cGH* GH, +void Newton(const cGH* GH, int N_procs, int N_active_procs, int my_proc, - horizon_sequence& hs, struct AH_info* const my_AH_info[], + horizon_sequence& hs, struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct Jacobian_info& Jacobian_info, @@ -98,33 +100,18 @@ void Newton(cGH* GH, const struct IO_info& IO_info, const struct BH_diagnostics_info& BH_diagnostics_info, const struct error_info& error_info, - const struct verbose_info& verbose_info) + const struct verbose_info& verbose_info, + struct iteration_status_buffers& isb) { const bool my_active_flag = hs.has_genuine_horizons(); const int N_horizons = hs.N_horizons(); -// buffers for reduce_and_broadcast() -bool* found_horizon_buffer = NULL; -int* hn_buffer = NULL; -int* iteration_buffer = NULL; -fp * rms_norm_buffer = NULL; -fp * infinity_norm_buffer = NULL; -if (hn_buffer == NULL) - then { - // allocate on first call - found_horizon_buffer = new bool[N_active_procs]; - hn_buffer = new int [N_active_procs]; - iteration_buffer = new int [N_active_procs]; - rms_norm_buffer = new fp [N_active_procs]; - infinity_norm_buffer = new fp [N_active_procs]; - } - // print out which horizons we're finding on this processor if (hs.has_genuine_horizons()) then CCTK_VInfo(CCTK_THORNSTRING, "proc %d: searching for horizon%s %s/%d", my_proc, - (N_horizons > 1 ? "s" : ""), + (hs.my_N_horizons() > 1 ? "s" : ""), hs.sequence_string(","), N_horizons); else CCTK_VInfo(CCTK_THORNSTRING, "proc %d: dummy horizon(s) only", @@ -157,14 +144,14 @@ if (hs.has_genuine_horizons()) int(there_is_another_genuine_horizon)); } - struct AH_info* AH_info_ptr - = horizon_is_genuine ? my_AH_info[hn] : NULL; - patch_system* ps_ptr = horizon_is_genuine ? AH_info_ptr->ps_ptr : NULL; - Jacobian* Jac_ptr = horizon_is_genuine ? AH_info_ptr->Jac_ptr: NULL; + struct AH_data* AH_data_ptr + = horizon_is_genuine ? my_AH_data[hn] : NULL; + patch_system* ps_ptr = horizon_is_genuine ? AH_data_ptr->ps_ptr : NULL; + Jacobian* Jac_ptr = horizon_is_genuine ? AH_data_ptr->Jac_ptr: NULL; const int max_iterations = horizon_is_genuine - ? (AH_info_ptr->initial_find_flag + ? (AH_data_ptr->initial_find_flag ? solver_info.max_Newton_iterations__initial : solver_info.max_Newton_iterations__subsequent) : INT_MAX; @@ -175,8 +162,6 @@ if (hs.has_genuine_horizons()) // for (int iteration = 1 ; ; ++iteration) { - enum expansion_status status; - if (verbose_info.print_algorithm_debug) then CCTK_VInfo(CCTK_THORNSTRING, "beginning iteration %d (horizon_is_genuine=%d)", @@ -195,19 +180,21 @@ if (hs.has_genuine_horizons()) iteration); jtutil::norm Theta_norms; - status = expansion(ps_ptr, - cgi, gi, - error_info, (iteration == 1), - true, // yes, we want the Jacobian coeffs - verbose_info.print_algorithm_details, - &Theta_norms); - const bool Theta_is_ok = (status == expansion_success); + const enum expansion_status + expansion_status = expansion(ps_ptr, + cgi, gi, + error_info, (iteration == 1), + true, // yes, we want Jacobian coeffs + verbose_info.print_algorithm_details, + &Theta_norms); + const bool Theta_is_ok = (expansion_status == expansion_success); + const bool norms_are_ok = horizon_is_genuine && Theta_is_ok; if (verbose_info.print_algorithm_debug) then { CCTK_VInfo(CCTK_THORNSTRING, " Newton_solve(): Theta_is_ok=%d", Theta_is_ok); - if (horizon_is_genuine && Theta_is_ok) + if (norms_are_ok) then CCTK_VInfo(CCTK_THORNSTRING, " Theta rms-norm %.1e, infinity-norm %.1e", double(Theta_norms.rms_norm()), @@ -221,18 +208,38 @@ if (hs.has_genuine_horizons()) iteration); + // - // compute and reduce-across-processors the are-we-done flags - // also broadcast and print error norms + // have we found this horizon? + // if so, compute and output BH diagnostics // - - // have we found *this* horizon? const bool found_this_horizon - = horizon_is_genuine && Theta_is_ok - && (Theta_norms.infinity_norm() <= solver_info - .Theta_norm_for_convergence); + = norms_are_ok && (Theta_norms.infinity_norm() + <= solver_info.Theta_norm_for_convergence); if (horizon_is_genuine) - then AH_info_ptr->found_flag = found_this_horizon; + then AH_data_ptr->found_flag = found_this_horizon; + + if (found_this_horizon) + then { + struct BH_diagnostics& BH_diagnostics + = AH_data_ptr->BH_diagnostics; + BH_diagnostics.compute(*ps_ptr, BH_diagnostics_info); + if (IO_info.output_BH_diagnostics) + then { + if (AH_data_ptr->BH_diagnostics_fileptr == NULL) + then AH_data_ptr->BH_diagnostics_fileptr + = BH_diagnostics.setup_output_file + (IO_info, N_horizons, hn); + BH_diagnostics.output(AH_data_ptr + ->BH_diagnostics_fileptr, + IO_info); + } + } + + + // + // see if we need more iterations (either on this or another horizon) + // // does *this* horizon need more iterations? // i.e. has this horizon's Newton iteration not yet converged? @@ -241,9 +248,9 @@ if (hs.has_genuine_horizons()) && !found_this_horizon && (iteration < max_iterations); - // do we (this processor) need to do more iterations + // do I (this processor) need to do more iterations // on this or a following horizon? - const bool we_need_more_iterations + const bool I_need_more_iterations = this_horizon_needs_more_iterations || there_is_another_genuine_horizon; @@ -256,58 +263,76 @@ if (hs.has_genuine_horizons()) " this_horizon_needs_more_iterations=%d", int(this_horizon_needs_more_iterations)); CCTK_VInfo(CCTK_THORNSTRING, - " we_need_more_iterations=%d", - int(we_need_more_iterations)); + " I_need_more_iterations=%d", + int(I_need_more_iterations)); } - // reduce/broadcast status flags and norms to all processors - // ==> determine if any processor needs more iterations - // on its current or a following horizon? - const bool any_proc_needs_more_iterations - = reduce_and_broadcast(GH, N_procs, N_active_procs, - my_proc, my_active_flag, - we_need_more_iterations, found_this_horizon, - hn, iteration, - ((horizon_is_genuine && Theta_is_ok) - ? Theta_norms.rms_norm() : 0.0), - ((horizon_is_genuine && Theta_is_ok) - ? Theta_norms.infinity_norm() : 0.0), - found_horizon_buffer, - hn_buffer, iteration_buffer, - rms_norm_buffer, infinity_norm_buffer); + // + // broadcast iteration status from each active processor + // to all processors, and inclusive-or the "we need more iterations" + // flags to see if *any* (active) processor needs more iterations + // + const bool any_proc_needs_more_iterations + = broadcast_status(GH, + N_procs, N_active_procs, + my_proc, my_active_flag, + hn, iteration, expansion_status, + (norms_are_ok ? Theta_norms.rms_norm() : 0.0), + (norms_are_ok ? Theta_norms.infinity_norm() : 0.0), + found_this_horizon, I_need_more_iterations, + isb); if (verbose_info.print_algorithm_details) then { CCTK_VInfo(CCTK_THORNSTRING, " ==> any_proc_needs_more_iterations=%d", int(any_proc_needs_more_iterations)); } - if (my_active_flag && verbose_info.print_algorithm_highlights) - then print_iteration_status(N_active_procs, - found_horizon_buffer, - hn_buffer, iteration_buffer, - rms_norm_buffer, infinity_norm_buffer); + + + // + // print the iteration status info + // + if ((my_proc == 0) && verbose_info.print_algorithm_highlights) + then print_status(N_active_procs, isb); + + + // + // for each processor which found a horizon, + // broadcast its horizon info to all processors + // and print the BH diagnostics on processor 0 + // + for (int found_proc = 0 ; + found_proc < N_active_procs ; + ++found_proc) + { + if (! isb.found_horizon_buffer[found_proc]) + then continue; + + const int found_hn = isb.hn_buffer[found_proc]; + if (verbose_info.print_algorithm_details) + then CCTK_VInfo(CCTK_THORNSTRING, + " broadcasting proc %d/horizon %d data", + found_proc, found_hn); + + struct AH_data& found_AH_data = *my_AH_data[found_hn]; + broadcast_horizon_data(GH, + my_proc == found_proc, + found_AH_data.BH_diagnostics, + *found_AH_data.ps_ptr, + found_AH_data.horizon_buffers); + + if ((my_proc == 0) && verbose_info.print_physics_details) + then found_AH_data.BH_diagnostics + .print(N_horizons, found_hn); + } + + + // + // if we found our horizon, maybe output the horizon shape? + // if (found_this_horizon) then { - struct BH_diagnostics& BH_diagnostics - = AH_info_ptr->BH_diagnostics; - BH_diagnostics.compute(*ps_ptr, BH_diagnostics_info); - if (verbose_info.print_physics_details) - then { - // FIXME: see header comment -- user probably won't see - // this for my_proc != 0 in a multiprocessor run - BH_diagnostics.print(N_horizons, hn); - } - if (IO_info.output_BH_diagnostics) - then { - if (AH_info_ptr->BH_diagnostics_fileptr == NULL) - then AH_info_ptr->BH_diagnostics_fileptr - = BH_diagnostics.setup_output_file - (IO_info, N_horizons, hn); - BH_diagnostics.output(AH_info_ptr->BH_diagnostics_fileptr, - IO_info); - } - if (IO_info.output_h) then output_gridfn(*ps_ptr, gfns::gfn__h, IO_info, IO_info.h_base_file_name, @@ -349,14 +374,15 @@ if (hs.has_genuine_horizons()) then CCTK_VInfo(CCTK_THORNSTRING, " computing Jacobian: genuine/dummy flag %d", int(this_horizon_needs_more_iterations)); - status = expansion_Jacobian(this_horizon_needs_more_iterations ? ps_ptr - : NULL, - this_horizon_needs_more_iterations ? Jac_ptr - : NULL, - cgi, gi, Jacobian_info, - error_info, (iteration == 1), - verbose_info.print_algorithm_details); - const bool Jacobian_is_ok = (status == expansion_success); + const enum expansion_status + Jacobian_status + = expansion_Jacobian + (this_horizon_needs_more_iterations ? ps_ptr : NULL, + this_horizon_needs_more_iterations ? Jac_ptr : NULL, + cgi, gi, Jacobian_info, + error_info, (iteration == 1), + verbose_info.print_algorithm_details); + const bool Jacobian_is_ok = (Jacobian_status == expansion_success); // @@ -424,105 +450,120 @@ if (hs.has_genuine_horizons()) assert( false ); } +//****************************************************************************** +//****************************************************************************** //****************************************************************************** // -// This function does an inclusive-or--reduction of the "we need more -// iterations (either on this or on another genuine horizon" flags across -// all active processors. It also broadcasts which horizon we're working -// on, the current iteration number, whether we've found this horizon, -// and the current error norms, to processor #0. -// -// The reduction and broadcast are combined in this function to allow -// doing them all with a single reduction operation (and thus only a -// single set of MPI overheads). +// This function (which must be called on *every* processor) broadcasts +// the per-iteration status information from each active processor to +// all processors. // // Arguments: // GH --> The Cactus grid hierarchy. // N_procs = The total number of processors. // N_active_procs = The number of active processors. // my_active_flag = Is this processor an active processor? -// we_need_more_iterations = "we need more iterations (either on this -// or on another genuine horizon" flag to be -// inclusive-or--reduced -// found_this_horizon = "have we found this horizon?" flag to be broadcast -// hn,iteration,{rms,infinity}_norm = the values to be broadcast: -// if this is an active processor these -// are broadcast to processor #0, -// otherwise these are ignored -// {found_horizon,hn,iteration,{rms,infinity}_norm}_buffer[N_active_procs] -// = buffers for flags, horizon numbers, and norms: if this is processor -// #0 these are set to the found-this-horizon-flags, horizon numbers, -// and norms for all active processors, otherwise these are set to -// undefined (garbage) values +// hn,iteration,status, +// {rms,infinity}_norm, +// found_this_horizon,I_need_more_iterations +// = On an active processors, these are the values to be broadcast. +// On a dummy processors, these are ignored. +// isb = (out) Holds both user buffers (set to the broadcast results) +// and low-level buffers (used within this function). If the +// buffer pointers are NULL then the buffers are allocated. // // Results: -// This function returns the inclusive-or--reduction of the -// we_need_more_iterations flags across all active processors. +// This function returns the inclusive-or over all active processors, +// of the broadcast I_need_more_iterations flags. // namespace { -bool reduce_and_broadcast(cGH* GH, - int N_procs, int N_active_procs, - int my_proc, bool my_active_flag, - bool we_need_more_iterations, bool found_this_horizon, - int hn, int iteration, fp rms_norm, fp infinity_norm, - bool found_horizon_buffer[], - int hn_buffer[], int iteration_buffer[], - fp rms_norm_buffer[], fp infinity_norm_buffer[]) +bool broadcast_status(const cGH* GH, + int N_procs, int N_active_procs, + int my_proc, bool my_active_flag, + int hn, int iteration, + enum expansion_status expansion_status, + fp rms_norm, fp infinity_norm, + bool found_this_horizon, bool I_need_more_iterations, + struct iteration_status_buffers& isb) { assert( my_proc >= 0 ); assert( my_proc < N_procs ); + +// +// We do the broadcast via a Cactus reduction operation (this is a KLUDGE, +// but until Cactus gets a generic interprocessor communications mechanism +// it's the best we can do without mucking with MPI ourself). To do the +// gather via a reduction, we set up a 2-D buffer whose entries are all +// 0s, except that on each processor the [my_proc] row has the values we +// want to gather. Then we do a sum-reduction of the buffers across +// processors. For the actual reduction we treat the buffers as 1-D +// arrays on each processor (this slightly simplifies the code). // -// We do the combined reduce/broadcast in a single reduction operation -// by setting up a 2-D buffer whose entries are all 0s, except that on each -// processor the [my_proc] row has the values we want to reduce/broadcast, -// then doing a sum-reduction of the buffers across processors. Alas, -// the input and output buffers must be distinct, so for the broadcast -// we need two copies of the buffer on each processor. +// To reduce overheads, we do the entire operation with a single (CCTK_REAL) +// Cactus reduce, casting values to CCTK_REAL as necessary (and encoding +// Boolean flags into signs of known-to-be-positive values. // -// To do everything in a single reduction operation, all the values have -// to be of a single datatype in a single array, so we convert hn and -// iteration to CCTK_REAL, and encode the Boolean flags in some of the -// variables' signs. Also, for simplicity we treat the buffer as a 1-D -// array for the reduction. +// Alas MPI (and thus Cactus) requires the input and output reduce buffers +// to be distinct, so we need two copies of the buffers on each processor. // -// the buffer is actually a 2-D array; these are the column numbers +// the buffers are actually 2-D arrays; these are the column numbers +// ... if we wanted to, we could pack hn, iteration, and expansion_status +// all into a single (64-bit) floating-point value, but it's not +// worth the trouble... enum { - buffer_var__hn = 0, - buffer_var__iteration, + // CCTK_INT buffer + buffer_var__hn = 0, // also encodes found_this_horizon flag + // in sign: +=true, -=false + buffer_var__iteration, // also encodes I_need_more_iterations flag + // in sign: +=true, -=false + buffer_var__expansion_status, buffer_var__rms_norm, buffer_var__infinity_norm, - N_buffer_vars + N_buffer_vars // no comma }; -static jtutil::array2d *reduce_buffer_in_ptr = NULL; -static jtutil::array2d *reduce_buffer_out_ptr = NULL; -if (reduce_buffer_in_ptr == NULL) +// +// allocate buffers if this is the first use +// +if (isb.hn_buffer == NULL) then { - // allocate on first call (constructor automagically zero-initializes) - reduce_buffer_in_ptr = new jtutil::array2d - (0, N_procs-1, - 0, N_buffer_vars-1); - reduce_buffer_out_ptr = new jtutil::array2d - (0, N_procs-1, - 0, N_buffer_vars-1); + isb.hn_buffer = new int [N_active_procs]; + isb.iteration_buffer = new int [N_active_procs]; + isb.expansion_status_buffer = new enum expansion_status + [N_active_procs]; + isb.rms_norm_buffer = new fp [N_active_procs]; + isb.infinity_norm_buffer = new fp [N_active_procs]; + isb.found_horizon_buffer = new bool[N_active_procs]; + + isb.send_buffer_ptr = new jtutil::array2d + (0, N_active_procs-1, + 0, N_buffer_vars-1); + isb.receive_buffer_ptr = new jtutil::array2d + (0, N_active_procs-1, + 0, N_buffer_vars-1); } -jtutil::array2d& reduce_buffer_in = *reduce_buffer_in_ptr; -jtutil::array2d& reduce_buffer_out = *reduce_buffer_out_ptr; +jtutil::array2d& send_buffer = *isb.send_buffer_ptr; +jtutil::array2d& receive_buffer = *isb.receive_buffer_ptr; // // pack this processor's values into the reduction buffer // +jtutil::zero_C_array(send_buffer.N_array(), send_buffer.data_array()); if (my_active_flag) then { - assert( reduce_buffer_in.is_valid_i(my_proc) ); - reduce_buffer_in(my_proc, buffer_var__hn) - = we_need_more_iterations ? +hn : -hn; - reduce_buffer_in(my_proc, buffer_var__iteration) - = found_this_horizon ? +iteration : -iteration; - reduce_buffer_in(my_proc, buffer_var__rms_norm) = rms_norm; - reduce_buffer_in(my_proc, buffer_var__infinity_norm) = infinity_norm; + assert( send_buffer.is_valid_i(my_proc) ); + assert( hn >= 0 ); // encoding scheme assumes this + assert( iteration > 0 ); // encoding scheme assumes this + send_buffer(my_proc, buffer_var__hn) + = found_this_horizon ? +hn : -hn; + send_buffer(my_proc, buffer_var__iteration) + = I_need_more_iterations ? +iteration : -iteration; + send_buffer(my_proc, buffer_var__expansion_status) + = int(expansion_status); + send_buffer(my_proc, buffer_var__rms_norm) = rms_norm; + send_buffer(my_proc, buffer_var__infinity_norm) = infinity_norm; } // @@ -533,49 +574,54 @@ if (my_active_flag) const int reduction_handle = CCTK_ReductionArrayHandle("sum"); if (reduction_handle < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, - "reduce_and_broadcast(): can't get sum-reduction handle!"); + "broadcast_status(): can't get sum-reduction handle!"); /*NOTREACHED*/ -// FIXME: CCTK_ReduceLocArrayToArray1D() prototypes the in_data argument -// as void* , not const void* -const int status +CCTK_Barrier(GH); /* PICKLE */ +const int reduction_status = CCTK_ReduceLocArrayToArray1D(GH, - -1, // all processors get result + -1, // broadcast results to all processors reduction_handle, - static_cast( - reduce_buffer_in.data_array() - ), - static_cast< void*>( - reduce_buffer_out.data_array() - ), - reduce_buffer_in.N_array(), + static_cast + (send_buffer .data_array()), + static_cast< void*> + (receive_buffer.data_array()), + send_buffer.N_array(), CCTK_VARIABLE_REAL); -if (status < 0) +if (reduction_status < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, - "reduce_and_broadcast(): error status %d from reduction!", - status); /*NOTREACHED*/ + "broadcast_status(): error status %d from reduction!", + reduction_status); /*NOTREACHED*/ // -// unpack the reduction buffer back to our results +// unpack the reduction buffer back to the high-level result buffers and +// compute the inclusive-or of the broadcast I_need_more_iterations flags // bool any_proc_needs_more_iterations = false; for (int proc = 0 ; proc < N_active_procs ; ++proc) { - const bool proc_needs_more_iterations - = reduce_buffer_out(proc,buffer_var__hn) > 0.0; - found_horizon_buffer[proc] - = reduce_buffer_out(proc,buffer_var__iteration) > 0.0; - + const int hn_temp = static_cast( + receive_buffer(proc, buffer_var__hn) + ); + isb.hn_buffer[proc] = jtutil::abs(hn_temp); + isb.found_horizon_buffer[proc] = (hn_temp > 0); + + const int iteration_temp = static_cast( + receive_buffer(proc, buffer_var__iteration) + ); + isb.iteration_buffer[proc] = jtutil::abs(iteration_temp); + const bool proc_needs_more_iterations = (iteration_temp > 0); any_proc_needs_more_iterations |= proc_needs_more_iterations; - hn_buffer[proc] - = int(fabs(reduce_buffer_out(proc, buffer_var__hn))); - iteration_buffer[proc] - = int(fabs(reduce_buffer_out(proc, buffer_var__iteration))); - rms_norm_buffer[proc] - = reduce_buffer_out(proc, buffer_var__rms_norm); - infinity_norm_buffer[proc] - = reduce_buffer_out(proc, buffer_var__infinity_norm); + isb.expansion_status_buffer[proc] + = static_cast( + receive_buffer(proc, buffer_var__expansion_status) + ); + + isb.rms_norm_buffer[proc] + = receive_buffer(proc, buffer_var__rms_norm); + isb.infinity_norm_buffer[proc] + = receive_buffer(proc, buffer_var__infinity_norm); } return any_proc_needs_more_iterations; @@ -584,47 +630,173 @@ return any_proc_needs_more_iterations; //****************************************************************************** +// +// This function (which must be called on *every* processor) broadcasts +// the BH diagnostics and (ghosted) horizon shape from a specified processor +// to all processors. +// +// Arguments: +// GH --> The Cactus grid hierarchy. +// broadcast_flag = true on the broadcasting processor, +// false on all other processors +// BH_diagnostics = On the broadcasting processor, this is the BH diagnostics +// to broadcast; on all other processors, it's set to the +// broadcast BH diagnostics. +// ps = On the broadcasting processor, gfn__h is broadcast; +// on all other processors, gfn__h is set to the broadcast values. +// horizon_buffers = Internal buffers for use in the broadcast; +// if N_buffer == 0 then we set N_buffer and allocate +// the buffers. +// +namespace { +void broadcast_horizon_data(const cGH* GH, + bool broadcast_flag, + struct BH_diagnostics& BH_diagnostics, + patch_system& ps, + struct horizon_buffers& horizon_buffers) +{ +// +// Implementation notes: +// +// We do the send via a Cactus sum-reduce where the data are 0 on +// all processors except the sending one. +// +// To reduce the interprocessor-communications cost, we actually only +// broadcast the nominal-grid horizon shape; we then do a synchronize +// operation on the patch system to recover the full ghosted-grid shape. +// + +if (horizon_buffers.N_buffer == 0) + then { + // allocate the buffers + horizon_buffers.N_buffer + = BH_diagnostics::N_buffer + ps.N_grid_points(); + horizon_buffers.send_buffer + = new CCTK_REAL[horizon_buffers.N_buffer]; + horizon_buffers.receive_buffer + = new CCTK_REAL[horizon_buffers.N_buffer]; + } + +if (broadcast_flag) + then { + // pack the data to be broadcast into the send buffer + BH_diagnostics.copy_to_buffer(horizon_buffers.send_buffer); + int posn = BH_diagnostics::N_buffer; +#ifdef NOT_YET + for (int pn = 0 ; pn < ps.N_patches() ; ++pn) + { + const patch& p = ps.ith_patch(pn); + for (int irho = p.min_irho() ; + irho <= p.max_irho() ; + ++irho) + { + for (int isigma = p.min_isigma() ; + isigma <= p.max_isigma() ; + ++isigma) + { + assert( posn < horizon_buffers.N_buffer ); + horizon_buffers.send_buffer[posn++] + = p.ghosted_gridfn(gfns::gfn__h, irho,isigma); + } + } + } + assert( posn == horizon_buffers.N_buffer ); +#endif // NOT_YET + } + else jtutil::zero_C_array(horizon_buffers.N_buffer, + horizon_buffers.send_buffer); + +// this name is appropriate for PUGHReduce, caveat user for other drivers :) +const int reduction_handle = CCTK_ReductionArrayHandle("sum"); +if (reduction_handle < 0) + then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, + "broadcast_horizon_data(): can't get sum-reduction handle!"); + /*NOTREACHED*/ + +CCTK_Barrier(GH); /* PICKLE */ +const int status + = CCTK_ReduceLocArrayToArray1D(GH, + -1, // result broadcast to all processors + reduction_handle, + static_cast + (horizon_buffers.send_buffer), + static_cast< void*> + (horizon_buffers.receive_buffer), + horizon_buffers.N_buffer, + CCTK_VARIABLE_REAL); +if (status < 0) + then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, + "broadcast_horizon_data(): error status %d from reduction!", + status); /*NOTREACHED*/ + +if (!broadcast_flag) + then { + // unpack the data from the receive buffer + BH_diagnostics.copy_from_buffer(horizon_buffers.receive_buffer); +#ifdef NOT_YET + int posn = BH_diagnostics::N_buffer; + for (int pn = 0 ; pn < ps.N_patches() ; ++pn) + { + patch& p = ps.ith_patch(pn); + for (int irho = p.min_irho() ; + irho <= p.max_irho() ; + ++irho) + { + for (int isigma = p.min_isigma() ; + isigma <= p.max_isigma() ; + ++isigma) + { + assert( posn < horizon_buffers.N_buffer ); + p.ghosted_gridfn(gfns::gfn__h, irho,isigma) + = horizon_buffers.receive_buffer[posn++]; + } + } + } + assert( posn == horizon_buffers.N_buffer ); + + // recover the full ghosted-grid horizon shape + // (we only broadcast the nominal-grid shape) + ps.synchronize(); +#endif // NOT_YET + } +} + } + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + // // This function is called on processor #0 to print the status of the // Newton iteration on each active processor. // // Arguments: // N_active_procs = The number of active processors. -// found_horizon_buffer[N_active_procs] = Tells whether each active processor -// has (just) successfully found its -// horizon. -// hn_buffer[N_active_procs] = Gives the horizon each active processor -// is currently working on. -// iteration_buffer[N_active_procs] = Gives the iteration number of each -// active processor on its current horizon. -// {rms,infinity}_norm_buffer[N_active_procs] = Gives the Theta norms for -// each active processor. +// isb = The high-level buffers here give the information to be printed. // namespace { -void print_iteration_status - (int N_active_procs, - const bool found_horizon_buffer[], - const int hn_buffer[], const int iteration_buffer[], - const fp rms_norm_buffer[], const fp infinity_norm_buffer[]) +void print_status(int N_active_procs, + const struct iteration_status_buffers& isb) { for (int proc = 0 ; proc < N_active_procs ; ++proc) { - if (hn_buffer[proc] == 0) + // don't print anything for processors doing dummy evaluations + if (isb.hn_buffer[proc] == 0) + then continue; + + if (isb.expansion_status_buffer[proc] == expansion_success) then CCTK_VInfo(CCTK_THORNSTRING, - " proc %d/dummy horizon", - proc); - else { - CCTK_VInfo(CCTK_THORNSTRING, " proc %d/horizon %d:it %d |Theta| rms=%.1e inf=%.1e", - proc, hn_buffer[proc], iteration_buffer[proc], - double(rms_norm_buffer[proc]), - double(infinity_norm_buffer[proc])); - if (found_horizon_buffer[proc]) - then CCTK_VInfo(CCTK_THORNSTRING, - " proc %d/horizon %d:found at iteration %d", - proc, hn_buffer[proc], - iteration_buffer[proc]); - } + proc, isb.hn_buffer[proc], + isb.iteration_buffer[proc], + double(isb.rms_norm_buffer[proc]), + double(isb.infinity_norm_buffer[proc])); + else CCTK_VInfo(CCTK_THORNSTRING, + " proc %d/horizon %d: %s", + proc, isb.hn_buffer[proc], + expansion_status_string( + isb.expansion_status_buffer[proc] + )); } } } diff --git a/src/driver/README.parallel b/src/driver/README.parallel index 3d553e0..136c026 100644 --- a/src/driver/README.parallel +++ b/src/driver/README.parallel @@ -62,12 +62,14 @@ whole Cactus run. from one processor to processor.] All the processors do their Newton iteration synchronously, working on -genuine horizons if they have any, or dummy horizons if not. +genuine horizons if they have any, or dummy horizons if not. If any +error occurs when computing Theta or the Jacobian, or in solving the +linear equations, we treat this as failing to find this horizon. For example, suppose we have 3 horizons, which are found (the Newton iteration converges) after respectively 3, 5, and 4 iterations. Then with 2 processors the iterations would look like this (where h1/2/3 -means horizon 1/2/3, h- means the dummy horizon, and a * after Theta +means horizon 1/2/3, -- means the dummy horizon, and a * after Theta means convergence): processor #0 processor #1 @@ -77,14 +79,14 @@ means convergence): 3 h1 Theta h2 Theta 4 h1 Jacobian h2 Jacobian 5 h1 Theta* h2 Theta -6 h- Jacobian h2 Jacobian +6 -- Jacobian h2 Jacobian 7 h3 Theta h2 Theta 8 h3 Jacobian h2 Jacobian 9 h3 Theta h2 Theta* -10 h3 Jacobian h- Jacobian -11 h3 Theta h- Theta -12 h3 Jacobian h- Jacobian -13 h3 Theta* h- Theta +10 h3 Jacobian -- Jacobian +11 h3 Theta -- Theta +12 h3 Jacobian -- Jacobian +13 h3 Theta* -- Theta (Notice that at line 6, processor #0 does a dummy-horizon Jacobian computation before starting its next genuine horizon. This is to keep @@ -101,45 +103,95 @@ With 3 processors this same example would look like this: 3 h1 Theta h2 Theta h3 Theta 4 h1 Jacobian h2 Jacobian h3 Jacobian 5 h1 Theta* h2 Theta h3 Theta -6 h- Jacobian h2 Jacobian h3 Jacobian -7 h- Theta h2 Theta h3 Theta* -8 h- Jacobian h2 Jacobian h- Jacobian -9 h- Theta h2 Theta* h- Theta +6 -- Jacobian h2 Jacobian h3 Jacobian +7 -- Theta h2 Theta h3 Theta* +8 -- Jacobian h2 Jacobian -- Jacobian +9 -- Theta h2 Theta* -- Theta With 4 processors it would look like this: processor #0 processor #1 processor #2 processor #3 ------------ ------------ ------------ ------------ -1 h1 Theta h2 Theta h3 Theta h- Theta -2 h1 Jacobian h2 Jacobian h3 Jacobian h- Jacobian -3 h1 Theta h2 Theta h3 Theta h- Theta -4 h1 Jacobian h2 Jacobian h3 Jacobian h- Jacobian -5 h1 Theta* h2 Theta h3 Theta h- Theta -6 h- Jacobian h2 Jacobian h3 Jacobian h- Jacobian -7 h- Theta h2 Theta h3 Theta* h- Theta -8 h- Jacobian h2 Jacobian h- Jacobian h- Jacobian -9 h- Theta h2 Theta* h- Theta h- Theta +1 h1 Theta h2 Theta h3 Theta -- Theta +2 h1 Jacobian h2 Jacobian h3 Jacobian -- Jacobian +3 h1 Theta h2 Theta h3 Theta -- Theta +4 h1 Jacobian h2 Jacobian h3 Jacobian -- Jacobian +5 h1 Theta* h2 Theta h3 Theta -- Theta +6 -- Jacobian h2 Jacobian h3 Jacobian -- Jacobian +7 -- Theta h2 Theta h3 Theta* -- Theta +8 -- Jacobian h2 Jacobian -- Jacobian -- Jacobian +9 -- Theta h2 Theta* -- Theta -- Theta Any additional processors would similarly do all Theta and Jacobian computations on the dummy horizon. -How and What to Synchronize ---------------------------- +Interprocessor Synchronization +------------------------------ + To implement this algorithm, after each Theta evaluation each active -processor computes a "we need more iterations (either on this or another -genuine horizon)" flag, and all the processors do an inclusive-or-reduction -of these flags, with the results broadcast to all processors. +processor computes a "I need more iterations (either on this or another +genuine horizon)" flag, and all the processors do an inclusive-or--reduction +of these flags, with the result broadcast to all processors. Each +processor then uses the inclusive-or-reduction result to determine +whether or not to continue iterating. -(If any error occurs when computing Theta or the Jacobian, or in solving -the linear equations, we treat this as failing to find this horizon.) -Each processor then uses the inclusive-or-reduction result to determine -whether or not to continue iterating. +Broadcasting the Iteration Status +--------------------------------- -After each Theta evaluation each active processor also broadcasts +After each Theta evaluation each active processor also sends * which horizon it's working on * the iteration number +* the expansion() status * its error norms to processor 0, which uses this information to print a status report of how the iterations are converging. + +If an (active) processor finds a horizon, it computes the BH diagnostics +and broadcasts them, and optionally the horizon shape, to all processors. +Processor 0 uses this to print the BH diagnostics, and (optionally) all +processors use the diagnostics and the horizon shape to set the drift +correction information and/or the BH excision mask. + + [All the processors actually need the ghosted horizon + shape in order to be able to do angular interpolations + to test if a given point is inside/outside the horizon. + But since interprocessor communication is probably + expensive, we only broadcast the horizon shape on the + nominal grid, then recover the ghost-zone values on + each processor.] + +Since all processors must participate in a broadcast, all processors +must somehow know that that horizon was just found. Thus, after each +Theta iteration, we broadcast a "which horizon I've just found, or none" +integer from each active processor to all processors. + + +Optimizing the Interprocessor Communication +------------------------------------------- + +In practice interprocessor communication is expensive. Moreover, in +practice, given that we're doing synchronous operations across all +processors (i.e. the inclusive-or--reduction with result broadcast to +all processors), latency is probably more important than bandwidth. + +Thus for implementation, we group the different interprocessor +communications described above, into two operations: + +After each Theta evaluation each active processor broadcasts +* which horizon it's working on +* the iteration number +* the expansion() status +* its error norms +* a Boolean "I have just found a horizon" flag +* a Boolean "I need more iterations" flag +to all processors. All processors do the inclusive-or--reduce of +the "I need more iterations" flags locally, on the values received +from the broadcast. + +All processors then go through the "I have just found a horizon" flags +received from the broadcast, and for each flag which is true, all +processors then know to participate in the broadcast of the BH diagnostics +and (optionally) the horizon shape from the just-found-it processor +to all processors. diff --git a/src/driver/driver.hh b/src/driver/driver.hh index 0995f50..3640102 100644 --- a/src/driver/driver.hh +++ b/src/driver/driver.hh @@ -155,14 +155,67 @@ struct verbose_info bool print_algorithm_debug; }; +// +// This struct holds buffers for broadcasting status information from +// each active processor to all processors. +// +struct iteration_status_buffers + { + // --> high-level buffers for broadcast-level semantics in Newton() + int* hn_buffer; + int* iteration_buffer; + enum expansion_status* expansion_status_buffer; + fp* rms_norm_buffer; + fp* infinity_norm_buffer; + bool* found_horizon_buffer; + + // --> low-level buffers for CCTK_Reduce() + jtutil::array2d *send_buffer_ptr; + jtutil::array2d *receive_buffer_ptr; + + iteration_status_buffers() + : hn_buffer(NULL), iteration_buffer(NULL), + expansion_status_buffer(NULL), + rms_norm_buffer(NULL), infinity_norm_buffer(NULL), + found_horizon_buffer(NULL), + send_buffer_ptr(NULL), receive_buffer_ptr(NULL) + { } + }; + +// +// This struct holds interprocessor-communication buffers for +// broadcasting the BH diagnostics and horizon shape from the +// processor which finds a given horizon, to all processors. +// +struct horizon_buffers + { + int N_buffer; // number of CCTK_REAL values in each buffer + CCTK_REAL* send_buffer; + CCTK_REAL* receive_buffer; + + horizon_buffers() + : N_buffer(0), + send_buffer(NULL), + receive_buffer(NULL) + { } + }; + //****************************************************************************** // // (A single copy of) this struct holds all of our information about // a single apparent horizon. // -struct AH_info +struct AH_data { + // + // Any given horizon is allocated to a single processor. + // On that processor (where we actually find the horizon) + // we keep a "full-fledged" patch system and a Jacobian. + // On other processors (where we only use the horizon shape + // to (optionally) set the BH excision mask), we keep only a + // "skeletal" patch system, and no Jacobian. + // patch_system* ps_ptr; Jacobian* Jac_ptr; @@ -172,13 +225,19 @@ struct AH_info // last time // false if we've tried before and succeeded the last time // (so we have that position as a very good initial guess) + // ... also false if we're not finding this horizon on this processor bool initial_find_flag; + // used only if we're finding this horizon on this processor struct initial_guess_info initial_guess_info; bool found_flag; // did we find this horizon (successfully) struct BH_diagnostics BH_diagnostics; FILE *BH_diagnostics_fileptr; + + // interprocessor-communication buffers + // for this horizon's BH diagnostics and (optionally) horizon shape + struct horizon_buffers horizon_buffers; }; // @@ -210,16 +269,18 @@ struct state struct BH_diagnostics_info BH_diagnostics_info; + // interprocessor-communication buffers for broadcasting + // Newton-iteration status from active processors to all processors + struct iteration_status_buffers isb; + horizon_sequence *my_hs; // --> new-allocated object describing - // the sequence of horizons + // the sequence of genuine horizons // assigned to this processor // horizon numbers ("hn") run from 1 to N_horizons inclusive - struct AH_info** my_AH_info; // --> new[]-allocated array of size + struct AH_data** my_AH_data; // --> new[]-allocated array of size // N_horizons+1, subscripted by hn, - // of --> info or NULL for horizons - // not assigned to this - // processor + // of --> info }; //****************************************************************************** @@ -249,9 +310,9 @@ enum initial_guess_method // Newton.cc // returns true for success, false for failure to converge -void Newton(cGH* GH, +void Newton(const cGH* GH, int N_procs, int N_active_procs, int my_proc, - horizon_sequence& hs, struct AH_info* const my_AH_info[], + horizon_sequence& hs, struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct Jacobian_info& Jacobian_info, @@ -259,7 +320,8 @@ void Newton(cGH* GH, const struct IO_info& IO_info, const struct BH_diagnostics_info& BH_diagnostics_info, const struct error_info& error_info, - const struct verbose_info& verbose_info); + const struct verbose_info& verbose_info, + struct iteration_status_buffers& isb); // io.cc void input_gridfn(patch_system& ps, int unknown_gfn, diff --git a/src/driver/find_horizons.cc b/src/driver/find_horizons.cc index 869c5d3..2c3290e 100644 --- a/src/driver/find_horizons.cc +++ b/src/driver/find_horizons.cc @@ -68,7 +68,7 @@ const CCTK_REAL* Cactus_gridfn_data_ptr(const cGH *GH, int varindex, void do_evaluate_expansions(int my_proc, int N_horizons, horizon_sequence& hs, - struct AH_info* const my_AH_info[], + struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct IO_info& IO_info, @@ -76,7 +76,7 @@ void do_evaluate_expansions(int my_proc, int N_horizons, const struct verbose_info& verbose_info, int timer_handle); void do_test_expansion_Jacobians(int my_proc, int N_horizons, - struct AH_info* const my_AH_info[], + struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, struct Jacobian_info& Jac_info, @@ -143,14 +143,14 @@ if (gi.geometry_method == geometry__local_interp_from_Cactus_grid) // i.e. for any (genuine) horizons where we didn't find the horizon previously for (int hn = hs.init_hn() ; hs.is_genuine() ; hn = hs.next_hn()) { - assert( state.my_AH_info[hn] != NULL ); - struct AH_info& AH_info = *state.my_AH_info[hn]; - if (AH_info.found_flag) - then AH_info.initial_find_flag = false; + assert( state.my_AH_data[hn] != NULL ); + struct AH_data& AH_data = *state.my_AH_data[hn]; + if (AH_data.found_flag) + then AH_data.initial_find_flag = false; else { - patch_system& ps = *AH_info.ps_ptr; + patch_system& ps = *AH_data.ps_ptr; setup_initial_guess(ps, - AH_info.initial_guess_info, + AH_data.initial_guess_info, IO_info, hn, state.N_horizons, verbose_info); if (active_flag && IO_info.output_initial_guess) @@ -158,7 +158,7 @@ if (gi.geometry_method == geometry__local_interp_from_Cactus_grid) IO_info, IO_info.h_base_file_name, hn, verbose_info .print_algorithm_highlights); - AH_info.initial_find_flag = true; + AH_data.initial_find_flag = true; } } @@ -169,7 +169,7 @@ switch (state.method) { case method__evaluate_expansions: do_evaluate_expansions(my_proc, state.N_horizons, - *state.my_hs, state.my_AH_info, + *state.my_hs, state.my_AH_data, cgi, gi, IO_info, error_info, verbose_info, state.timer_handle); @@ -177,7 +177,7 @@ case method__evaluate_expansions: case method__test_expansion_Jacobians: do_test_expansion_Jacobians(my_proc, state.N_horizons, - state.my_AH_info, + state.my_AH_data, cgi, gi, Jac_info, (test_all_Jacobian_compute_methods != 0), IO_info, error_info, verbose_info, @@ -189,10 +189,11 @@ case method__find_horizons: then CCTK_TimerStartI(state.timer_handle); Newton(cctkGH, state.N_procs, state.N_active_procs, my_proc, - *state.my_hs, state.my_AH_info, + *state.my_hs, state.my_AH_data, cgi, gi, Jac_info, state.solver_info, IO_info, state.BH_diagnostics_info, - error_info, verbose_info); + error_info, verbose_info, + state.isb); if (state.timer_handle >= 0) then CCTK_TimerStopI(state.timer_handle); break; @@ -309,7 +310,7 @@ return data_ptr; namespace { void do_evaluate_expansions(int my_proc, int N_horizons, horizon_sequence& hs, - struct AH_info* const my_AH_info[], + struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct IO_info& IO_info, @@ -328,9 +329,9 @@ if (active_flag) hs.is_genuine() ; hn = hs.next_hn()) { - assert( my_AH_info[hn] != NULL ); - struct AH_info& AH_info = *my_AH_info[hn]; - patch_system& ps = *AH_info.ps_ptr; + assert( my_AH_data[hn] != NULL ); + struct AH_data& AH_data = *my_AH_data[hn]; + patch_system& ps = *AH_data.ps_ptr; if (timer_handle >= 0) then CCTK_TimerStartI(timer_handle); @@ -402,7 +403,7 @@ if (active_flag) // namespace { void do_test_expansion_Jacobians(int my_proc, int N_horizons, - struct AH_info* const my_AH_info[], + struct AH_data* const my_AH_data[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, struct Jacobian_info& Jac_info, @@ -418,13 +419,13 @@ assert(N_horizons >= 1); const bool print_msg_flag = true; const int hn = 1; -struct AH_info* AH_info_ptr = active_flag ? my_AH_info[hn] : NULL; -patch_system* ps_ptr = active_flag ? AH_info_ptr->ps_ptr : NULL; +struct AH_data* AH_data_ptr = active_flag ? my_AH_data[hn] : NULL; +patch_system* ps_ptr = active_flag ? AH_data_ptr->ps_ptr : NULL; // // numerical-perturbation Jacobian // -Jacobian* Jac_NP_ptr = active_flag ? AH_info_ptr->Jac_ptr : NULL; +Jacobian* Jac_NP_ptr = active_flag ? AH_data_ptr->Jac_ptr : NULL; expansion(ps_ptr, cgi, gi, error_info, true); // initial evaluation diff --git a/src/driver/horizon_sequence.cc b/src/driver/horizon_sequence.cc index fa88423..0dde7bc 100644 --- a/src/driver/horizon_sequence.cc +++ b/src/driver/horizon_sequence.cc @@ -7,6 +7,7 @@ // horizon_sequence::sequence_string // horizon_sequence::append_hn // horizon_sequence::next_posn +// horizon_sequence::is_hn_genuine // #include @@ -103,3 +104,20 @@ return (pos < 0) ? pos-1 : (pos+1 < my_N_horizons_) ? pos+1 : -1; } + +//****************************************************************************** + +// +// This function determines whether or not a given hn is genuine. +// +bool horizon_sequence::is_hn_genuine(int hn) + const +{ + for (int pos = 0 ; pos < my_N_horizons_ ; ++pos) + { + if (my_hn_[pos] == hn) + then return true; + } + +return false; +} diff --git a/src/driver/horizon_sequence.hh b/src/driver/horizon_sequence.hh index ca363a1..aea4e06 100644 --- a/src/driver/horizon_sequence.hh +++ b/src/driver/horizon_sequence.hh @@ -51,6 +51,9 @@ public: int get_hn() const { return posn_is_genuine(posn_) ? my_hn_[posn_] : 0; } + // is a given hn genuine? + bool is_hn_genuine(int hn) const; + // // ***** traverse the sequence ***** // diff --git a/src/driver/setup.cc b/src/driver/setup.cc index 77cdd10..d4d6e11 100644 --- a/src/driver/setup.cc +++ b/src/driver/setup.cc @@ -297,8 +297,8 @@ horizon_sequence& hs = *state.my_hs; // // if we're going to actually find horizons // we spread the horizons over multiple processors for maximum efficiency, -// otherwise (we're just doing testing/debugging computations, so) we -// allocate all the horizons to processor #0 for simplicity +// otherwise (we're just doing testing/debugging computations, so) +// we allocate all the horizons to processor #0 for simplicity // const bool multiproc_flag = (state.method == method__find_horizons); state.N_active_procs @@ -308,11 +308,12 @@ state.N_active_procs verbose_info); // horizon numbers run from 1 to N_horizons inclusive -state.my_AH_info = (N_horizons == 0) ? NULL : new AH_info*[N_horizons+1]; +// so the array size is N_horizons+1 +state.my_AH_data = (N_horizons == 0) ? NULL : new AH_data*[N_horizons+1]; { - for (int hn = 1 ; hn <= N_horizons ; ++hn) + for (int hn = 0 ; hn <= N_horizons ; ++hn) { - state.my_AH_info[hn] = NULL; + state.my_AH_data[hn] = NULL; } } @@ -336,16 +337,20 @@ if (interp_param_table_handle < 0) "AHFinderDirect_setup(): bad interpatch-interpolator parameter(s) \"%s\"!", interpatch_interpolator_pars); /*NOTREACHED*/ -// setup all the genuine horizons on this processor +// setup all horizons on this processor, +// with full-fledged patch systems for genuine horizons +// and skeletal patch systems for others { - for (int hn = hs.init_hn() ; hs.is_genuine() ; hn = hs.next_hn()) + for (int hn = 1 ; hn <= hs.N_horizons() ; ++hn) { - state.my_AH_info[hn] = new AH_info; - struct AH_info& AH_info = *state.my_AH_info[hn]; + const bool genuine_flag = hs.is_hn_genuine(hn); + state.my_AH_data[hn] = new AH_data; + struct AH_data& AH_data = *state.my_AH_data[hn]; if (verbose_info.print_algorithm_highlights) then CCTK_VInfo(CCTK_THORNSTRING, - " setting up data structures for horizon %d", + " setting up %s data structures for horizon %d", + (genuine_flag ? "full-fledged" : "skeletal"), hn); // decide what type of patch system this one should be @@ -366,77 +371,84 @@ if (interp_param_table_handle < 0) : patch_system::type_of_name(patch_system_type[hn]); // create the patch system - AH_info.ps_ptr + AH_data.ps_ptr = new patch_system(origin_x[hn], origin_y[hn], origin_z[hn], ps_type, ghost_zone_width, patch_overlap_width, N_zones_per_right_angle[hn], - gfns::nominal_min_gfn, gfns::nominal_max_gfn, + gfns::nominal_min_gfn, + (genuine_flag ? gfns::nominal_max_gfn + : gfns::skeletal_nominal_max_gfn), gfns::ghosted_min_gfn, gfns::ghosted_max_gfn, interp_handle, interp_param_table_handle, true, verbose_info.print_algorithm_details); - patch_system& ps = *AH_info.ps_ptr; - ps.set_gridfn_to_constant(1.0, gfns::gfn__one); + patch_system& ps = *AH_data.ps_ptr; + if (genuine_flag) + then ps.set_gridfn_to_constant(1.0, gfns::gfn__one); - // Jacobian matrix will be created later - AH_info.Jac_ptr = new_Jacobian(Jac_info.Jacobian_store_solve_method, - ps, - verbose_info.print_algorithm_details); + AH_data.Jac_ptr = genuine_flag + ? new_Jacobian(Jac_info.Jacobian_store_solve_method, + ps, + verbose_info.print_algorithm_details) + : NULL; - AH_info.initial_find_flag = true; + AH_data.initial_find_flag = genuine_flag; - if (verbose_info.print_algorithm_details) - then CCTK_VInfo(CCTK_THORNSTRING, + if (genuine_flag) + then { + if (verbose_info.print_algorithm_details) + then CCTK_VInfo(CCTK_THORNSTRING, " setting initial guess parameters etc"); - AH_info.initial_guess_info.method - = decode_initial_guess_method(initial_guess_method[hn]); - // ... Kerr/Kerr - AH_info.initial_guess_info.Kerr_Kerr_info.x_posn - = initial_guess__Kerr_Kerr__x_posn[hn]; - AH_info.initial_guess_info.Kerr_Kerr_info.y_posn - = initial_guess__Kerr_Kerr__y_posn[hn]; - AH_info.initial_guess_info.Kerr_Kerr_info.z_posn - = initial_guess__Kerr_Kerr__z_posn[hn]; - AH_info.initial_guess_info.Kerr_Kerr_info.mass - = initial_guess__Kerr_Kerr__mass[hn]; - AH_info.initial_guess_info.Kerr_Kerr_info.spin - = initial_guess__Kerr_Kerr__spin[hn]; - // ... Kerr/Kerr-Schild - AH_info.initial_guess_info.Kerr_KerrSchild_info.x_posn - = initial_guess__Kerr_KerrSchild__x_posn[hn]; - AH_info.initial_guess_info.Kerr_KerrSchild_info.y_posn - = initial_guess__Kerr_KerrSchild__y_posn[hn]; - AH_info.initial_guess_info.Kerr_KerrSchild_info.z_posn - = initial_guess__Kerr_KerrSchild__z_posn[hn]; - AH_info.initial_guess_info.Kerr_KerrSchild_info.mass - = initial_guess__Kerr_KerrSchild__mass[hn]; - AH_info.initial_guess_info.Kerr_KerrSchild_info.spin - = initial_guess__Kerr_KerrSchild__spin[hn]; - // ... coordinate sphere - AH_info.initial_guess_info.coord_sphere_info.x_center - = initial_guess__coord_sphere__x_center[hn]; - AH_info.initial_guess_info.coord_sphere_info.y_center - = initial_guess__coord_sphere__y_center[hn]; - AH_info.initial_guess_info.coord_sphere_info.z_center - = initial_guess__coord_sphere__z_center[hn]; - AH_info.initial_guess_info.coord_sphere_info.radius - = initial_guess__coord_sphere__radius[hn]; - // ... coordinate ellipsoid - AH_info.initial_guess_info.coord_ellipsoid_info.x_center - = initial_guess__coord_ellipsoid__x_center[hn]; - AH_info.initial_guess_info.coord_ellipsoid_info.y_center - = initial_guess__coord_ellipsoid__y_center[hn]; - AH_info.initial_guess_info.coord_ellipsoid_info.z_center - = initial_guess__coord_ellipsoid__z_center[hn]; - AH_info.initial_guess_info.coord_ellipsoid_info.x_radius - = initial_guess__coord_ellipsoid__x_radius[hn]; - AH_info.initial_guess_info.coord_ellipsoid_info.y_radius - = initial_guess__coord_ellipsoid__y_radius[hn]; - AH_info.initial_guess_info.coord_ellipsoid_info.z_radius - = initial_guess__coord_ellipsoid__z_radius[hn]; - - AH_info.found_flag = false; - AH_info.BH_diagnostics_fileptr = NULL; + AH_data.initial_guess_info.method + = decode_initial_guess_method(initial_guess_method[hn]); + // ... Kerr/Kerr + AH_data.initial_guess_info.Kerr_Kerr_info.x_posn + = initial_guess__Kerr_Kerr__x_posn[hn]; + AH_data.initial_guess_info.Kerr_Kerr_info.y_posn + = initial_guess__Kerr_Kerr__y_posn[hn]; + AH_data.initial_guess_info.Kerr_Kerr_info.z_posn + = initial_guess__Kerr_Kerr__z_posn[hn]; + AH_data.initial_guess_info.Kerr_Kerr_info.mass + = initial_guess__Kerr_Kerr__mass[hn]; + AH_data.initial_guess_info.Kerr_Kerr_info.spin + = initial_guess__Kerr_Kerr__spin[hn]; + // ... Kerr/Kerr-Schild + AH_data.initial_guess_info.Kerr_KerrSchild_info.x_posn + = initial_guess__Kerr_KerrSchild__x_posn[hn]; + AH_data.initial_guess_info.Kerr_KerrSchild_info.y_posn + = initial_guess__Kerr_KerrSchild__y_posn[hn]; + AH_data.initial_guess_info.Kerr_KerrSchild_info.z_posn + = initial_guess__Kerr_KerrSchild__z_posn[hn]; + AH_data.initial_guess_info.Kerr_KerrSchild_info.mass + = initial_guess__Kerr_KerrSchild__mass[hn]; + AH_data.initial_guess_info.Kerr_KerrSchild_info.spin + = initial_guess__Kerr_KerrSchild__spin[hn]; + // ... coordinate sphere + AH_data.initial_guess_info.coord_sphere_info.x_center + = initial_guess__coord_sphere__x_center[hn]; + AH_data.initial_guess_info.coord_sphere_info.y_center + = initial_guess__coord_sphere__y_center[hn]; + AH_data.initial_guess_info.coord_sphere_info.z_center + = initial_guess__coord_sphere__z_center[hn]; + AH_data.initial_guess_info.coord_sphere_info.radius + = initial_guess__coord_sphere__radius[hn]; + // ... coordinate ellipsoid + AH_data.initial_guess_info.coord_ellipsoid_info.x_center + = initial_guess__coord_ellipsoid__x_center[hn]; + AH_data.initial_guess_info.coord_ellipsoid_info.y_center + = initial_guess__coord_ellipsoid__y_center[hn]; + AH_data.initial_guess_info.coord_ellipsoid_info.z_center + = initial_guess__coord_ellipsoid__z_center[hn]; + AH_data.initial_guess_info.coord_ellipsoid_info.x_radius + = initial_guess__coord_ellipsoid__x_radius[hn]; + AH_data.initial_guess_info.coord_ellipsoid_info.y_radius + = initial_guess__coord_ellipsoid__y_radius[hn]; + AH_data.initial_guess_info.coord_ellipsoid_info.z_radius + = initial_guess__coord_ellipsoid__z_radius[hn]; + } + + AH_data.found_flag = false; + AH_data.BH_diagnostics_fileptr = NULL; } } } diff --git a/src/driver/test_horizon_sequence.cc b/src/driver/test_horizon_sequence.cc index d59a0f4..32b1700 100644 --- a/src/driver/test_horizon_sequence.cc +++ b/src/driver/test_horizon_sequence.cc @@ -56,6 +56,10 @@ assert( hs0.next_hn() == 0 ); assert( hs0.dummy_number() == 4 ); assert( hs0.N_horizons() == 0 ); assert( hs0.my_N_horizons() == 0 ); +assert( ! hs0.is_hn_genuine(-1) ); +assert( ! hs0.is_hn_genuine(0) ); +assert( ! hs0.is_hn_genuine(1) ); +assert( ! hs0.is_hn_genuine(42) ); } } @@ -69,6 +73,10 @@ horizon_sequence hs1(1); assert( hs1.N_horizons() == 1 ); assert( hs1.my_N_horizons() == 0 ); assert( ! hs1.has_genuine_horizons() ); +assert( ! hs1.is_hn_genuine(-1) ); +assert( ! hs1.is_hn_genuine(0) ); +assert( ! hs1.is_hn_genuine(1) ); +assert( ! hs1.is_hn_genuine(42) ); assert( hs1.append_hn(42) == 1 ); assert( hs1.N_horizons() == 1 ); assert( hs1.my_N_horizons() == 1 ); @@ -77,6 +85,10 @@ assert( STRING_EQUAL(hs1.sequence_string(","), "42") ); assert( hs1.is_genuine() ); assert( ! hs1.is_next_genuine() ); assert( hs1.dummy_number() == 0 ); +assert( ! hs1.is_hn_genuine(-1) ); +assert( ! hs1.is_hn_genuine(0) ); +assert( ! hs1.is_hn_genuine(1) ); +assert( hs1.is_hn_genuine(42) ); assert( hs1.init_hn() == 42 ); assert( hs1.get_hn() == 42 ); assert( hs1.is_genuine() ); @@ -117,11 +129,22 @@ horizon_sequence hs2(2); assert( hs2.N_horizons() == 2 ); assert( hs2.my_N_horizons() == 0 ); assert( ! hs2.has_genuine_horizons() ); +assert( ! hs2.is_hn_genuine(-1) ); +assert( ! hs2.is_hn_genuine(0) ); +assert( ! hs2.is_hn_genuine(1) ); +assert( ! hs2.is_hn_genuine(42) ); +assert( ! hs2.is_hn_genuine(69) ); assert( hs2.append_hn(42) == 1 ); assert( hs2.append_hn(69) == 2 ); assert( hs2.N_horizons() == 2 ); assert( hs2.my_N_horizons() == 2 ); assert( hs2.has_genuine_horizons() ); +assert( ! hs2.is_hn_genuine(-1) ); +assert( ! hs2.is_hn_genuine(0) ); +assert( ! hs2.is_hn_genuine(1) ); +assert( hs2.is_hn_genuine(42) ); +assert( hs2.is_hn_genuine(69) ); +assert( ! hs2.is_hn_genuine(1000) ); assert( STRING_EQUAL(hs2.sequence_string(","), "42,69") ); assert( hs2.is_genuine() ); assert( hs2.is_next_genuine() ); @@ -131,6 +154,12 @@ assert( hs2.init_hn() == 42 ); assert( hs2.is_genuine() ); assert( hs2.is_next_genuine() ); assert( hs2.dummy_number() == 0 ); +assert( ! hs2.is_hn_genuine(-1) ); +assert( ! hs2.is_hn_genuine(0) ); +assert( ! hs2.is_hn_genuine(1) ); +assert( hs2.is_hn_genuine(42) ); +assert( hs2.is_hn_genuine(69) ); +assert( ! hs2.is_hn_genuine(1000) ); assert( hs2.next_hn() == 69 ); assert( hs2.get_hn() == 69 ); assert( hs2.is_genuine() ); @@ -153,6 +182,12 @@ assert( hs2.next_hn() == 0 ); assert( hs2.dummy_number() == 3 ); assert( hs2.N_horizons() == 2 ); assert( hs2.my_N_horizons() == 2 ); +assert( ! hs2.is_hn_genuine(-1) ); +assert( ! hs2.is_hn_genuine(0) ); +assert( ! hs2.is_hn_genuine(1) ); +assert( hs2.is_hn_genuine(42) ); +assert( hs2.is_hn_genuine(69) ); +assert( ! hs2.is_hn_genuine(1000) ); } } @@ -178,6 +213,13 @@ void test345() assert( hs.next_hn() == 0 ); assert( hs.dummy_number() == 3 ); + assert( ! hs.is_hn_genuine(-1) ); + assert( ! hs.is_hn_genuine(0) ); + assert( ! hs.is_hn_genuine(1) ); + assert( ! hs.is_hn_genuine(42) ); + assert( ! hs.is_hn_genuine(69) ); + assert( ! hs.is_hn_genuine(1000) ); + assert( hs.append_hn(42) == 1 ); assert( hs.append_hn(69) == 2 ); assert( hs.append_hn(105) == 3 ); @@ -188,6 +230,15 @@ void test345() assert( hs.is_next_genuine() ); assert( hs.dummy_number() == 0 ); + assert( ! hs.is_hn_genuine(-1) ); + assert( ! hs.is_hn_genuine(0) ); + assert( ! hs.is_hn_genuine(1) ); + assert( hs.is_hn_genuine(42) ); + assert( hs.is_hn_genuine(69) ); + assert( ! hs.is_hn_genuine(100) ); + assert( hs.is_hn_genuine(105) ); + assert( ! hs.is_hn_genuine(1000) ); + assert( STRING_EQUAL(hs.sequence_string(","), "42,69,105") ); for (int i = 1 ; i <= 4 ; ++i) @@ -200,6 +251,16 @@ void test345() assert( hs.is_genuine() ); assert( hs.is_next_genuine() ); assert( hs.dummy_number() == 0 ); + + assert( ! hs.is_hn_genuine(-1) ); + assert( ! hs.is_hn_genuine(0) ); + assert( ! hs.is_hn_genuine(1) ); + assert( hs.is_hn_genuine(42) ); + assert( hs.is_hn_genuine(69) ); + assert( ! hs.is_hn_genuine(100) ); + assert( hs.is_hn_genuine(105) ); + assert( ! hs.is_hn_genuine(1000) ); + assert( hs.next_hn() == 69 ); assert( hs.get_hn() == 69 ); assert( hs.is_genuine() ); @@ -225,6 +286,15 @@ void test345() assert( ! hs.is_genuine() ); assert( ! hs.is_next_genuine() ); assert( hs.dummy_number() == 3 ); + + assert( ! hs.is_hn_genuine(-1) ); + assert( ! hs.is_hn_genuine(0) ); + assert( ! hs.is_hn_genuine(1) ); + assert( hs.is_hn_genuine(42) ); + assert( hs.is_hn_genuine(69) ); + assert( ! hs.is_hn_genuine(100) ); + assert( hs.is_hn_genuine(105) ); + assert( ! hs.is_hn_genuine(1000) ); } } } -- cgit v1.2.3