// Newton.cc -- solve Theta(h) = 0 via Newton's method // $Header$ // // Newton_solve - driver to solve Theta(h) = 0 for all our horizons // /// broadcast_status - broadcast status from active processors to all processors /// broadcast_horizon_data - broadcast AH data to all processors /// /// print_status - print Newton-iteration status on each active proc /// Newton_step - take the Newton step, scaling it down if it's too large // #include #include #include #include #include "util_Table.h" #include "cctk.h" #include "cctk_Arguments.h" #include "config.h" #include "stdc.h" #include "../jtutil/util.hh" #include "../jtutil/array.hh" #include "../jtutil/cpm_map.hh" #include "../jtutil/linear_map.hh" using jtutil::error_exit; #include "../patch/coords.hh" #include "../patch/grid.hh" #include "../patch/fd_grid.hh" #include "../patch/patch.hh" #include "../patch/patch_edge.hh" #include "../patch/patch_interp.hh" #include "../patch/ghost_zone.hh" #include "../patch/patch_system.hh" #include "../elliptic/Jacobian.hh" #include "../gr/gfns.hh" #include "../gr/gr.hh" #include "horizon_sequence.hh" #include "BH_diagnostics.hh" #include "driver.hh" //****************************************************************************** // // prototypes for functions local to this file // namespace { bool broadcast_status(const cGH* GH, int N_procs, int N_active_procs, int my_proc, bool my_active_flag, int hn, int iteration, enum expansion_status expansion_status, fp rms_norm, fp infinity_norm, bool found_this_horizon, bool I_need_more_iterations, struct iteration_status_buffers& isb); void broadcast_horizon_data(const cGH* GH, bool broadcast_flag, bool broadcast_horizon_shape, struct BH_diagnostics& BH_diagnostics, patch_system& ps, struct horizon_buffers& horizon_buffers); void print_status(int N_active_procs, const struct iteration_status_buffers& isb); void Newton_step(patch_system& ps, fp max_allowable_Delta_h_over_h, const struct verbose_info& verbose_info); } //****************************************************************************** // // This function tries to finds each horizon assigned to this processor, // by solving Theta(h) = 0 via Newton's method. For each horizon found, // it computes the BH diagnostics, optionally prints them (via CCTK_VInfo()), // and optionally writes them to a BH diagnostics output file. It also // optionally writes the horizon shape itself to a data file. // // This function must be called synchronously across all processors, // and it operates synchronously, returning only when every processor // is done with all its horizons. See ./README.parallel for a discussion // of the parallel/multiprocessor issues and algorithm. // void Newton(const cGH* GH, int N_procs, int N_active_procs, int my_proc, horizon_sequence& hs, struct AH_data* const AH_data_array[], const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct Jacobian_info& Jacobian_info, const struct solver_info& solver_info, const struct IO_info& IO_info, const struct BH_diagnostics_info& BH_diagnostics_info, bool broadcast_horizon_shape, const struct error_info& error_info, const struct verbose_info& verbose_info, struct iteration_status_buffers& isb) { const bool my_active_flag = hs.has_genuine_horizons(); const int N_horizons = hs.N_horizons(); // print out which horizons we're finding on this processor if (hs.has_genuine_horizons()) then CCTK_VInfo(CCTK_THORNSTRING, "proc %d: searching for horizon%s %s/%d", my_proc, (hs.my_N_horizons() > 1 ? "s" : ""), hs.sequence_string(","), N_horizons); else CCTK_VInfo(CCTK_THORNSTRING, "proc %d: dummy horizon(s) only", my_proc); // // each pass through this loop finds a single horizon, // or does corresponding dummy-horizon calls // // note there is no explicit exit test, rather we exit from the middle // of the loop (only) when all processors are done with all their genuine // horizons // for (int hn = hs.init_hn() ; ; hn = hs.next_hn()) { if (verbose_info.print_algorithm_details) then CCTK_VInfo(CCTK_THORNSTRING, "Newton_solve(): processor %d working on horizon %d", my_proc, hn); const bool horizon_is_genuine = hs.is_genuine(); const bool there_is_another_genuine_horizon = hs.is_next_genuine(); if (verbose_info.print_algorithm_details) then { CCTK_VInfo(CCTK_THORNSTRING, " horizon_is_genuine=%d", int(horizon_is_genuine)); CCTK_VInfo(CCTK_THORNSTRING, " there_is_another_genuine_horizon=%d", int(there_is_another_genuine_horizon)); } struct AH_data* AH_data_ptr = horizon_is_genuine ? AH_data_array[hn] : NULL; patch_system* ps_ptr = horizon_is_genuine ? AH_data_ptr->ps_ptr : NULL; Jacobian* Jac_ptr = horizon_is_genuine ? AH_data_ptr->Jac_ptr: NULL; const int max_iterations = horizon_is_genuine ? (AH_data_ptr->initial_find_flag ? solver_info.max_Newton_iterations__initial : solver_info.max_Newton_iterations__subsequent) : INT_MAX; // // each pass through this loop does a single Newton iteration // on the current horizon (which might be either genuine or dummy) // for (int iteration = 1 ; ; ++iteration) { if (verbose_info.print_algorithm_debug) then CCTK_VInfo(CCTK_THORNSTRING, "beginning iteration %d (horizon_is_genuine=%d)", iteration, int(horizon_is_genuine)); // // evaluate the expansion Theta(h) and its norms // *** this is a synchronous operation across all processors *** // if (horizon_is_genuine && solver_info.debugging_output_at_each_Newton_iteration) then output_gridfn(*ps_ptr, gfns::gfn__h, IO_info, IO_info.h_base_file_name, hn, verbose_info.print_algorithm_highlights, iteration); jtutil::norm Theta_norms; const enum expansion_status expansion_status = expansion(ps_ptr, cgi, gi, error_info, (iteration == 1), true, // yes, we want Jacobian coeffs verbose_info.print_algorithm_details, &Theta_norms); const bool Theta_is_ok = (expansion_status == expansion_success); const bool norms_are_ok = horizon_is_genuine && Theta_is_ok; if (verbose_info.print_algorithm_debug) then { CCTK_VInfo(CCTK_THORNSTRING, " Newton_solve(): Theta_is_ok=%d", Theta_is_ok); if (norms_are_ok) then CCTK_VInfo(CCTK_THORNSTRING, " Theta rms-norm %.1e, infinity-norm %.1e", double(Theta_norms.rms_norm()), double(Theta_norms.infinity_norm())); } if (horizon_is_genuine && Theta_is_ok && solver_info.debugging_output_at_each_Newton_iteration) then output_gridfn(*ps_ptr, gfns::gfn__Theta, IO_info, IO_info.Theta_base_file_name, hn, verbose_info.print_algorithm_highlights, iteration); // // have we found this horizon? // if so, compute and output BH diagnostics // const bool found_this_horizon = norms_are_ok && (Theta_norms.infinity_norm() <= solver_info.Theta_norm_for_convergence); if (horizon_is_genuine) then AH_data_ptr->found_flag = found_this_horizon; if (found_this_horizon) then { struct BH_diagnostics& BH_diagnostics = AH_data_ptr->BH_diagnostics; BH_diagnostics.compute(*ps_ptr, BH_diagnostics_info); if (IO_info.output_BH_diagnostics) then { if (AH_data_ptr->BH_diagnostics_fileptr == NULL) then AH_data_ptr->BH_diagnostics_fileptr = BH_diagnostics.setup_output_file (IO_info, N_horizons, hn); BH_diagnostics.output(AH_data_ptr ->BH_diagnostics_fileptr, IO_info); } } // // see if we need more iterations (either on this or another horizon) // // does *this* horizon need more iterations? // i.e. has this horizon's Newton iteration not yet converged? const bool this_horizon_needs_more_iterations = horizon_is_genuine && Theta_is_ok && !found_this_horizon && (iteration < max_iterations); // do I (this processor) need to do more iterations // on this or a following horizon? const bool I_need_more_iterations = this_horizon_needs_more_iterations || there_is_another_genuine_horizon; if (verbose_info.print_algorithm_details) then { CCTK_VInfo(CCTK_THORNSTRING, " flags: found_this_horizon=%d", int(found_this_horizon)); CCTK_VInfo(CCTK_THORNSTRING, " this_horizon_needs_more_iterations=%d", int(this_horizon_needs_more_iterations)); CCTK_VInfo(CCTK_THORNSTRING, " I_need_more_iterations=%d", int(I_need_more_iterations)); } // // broadcast iteration status from each active processor // to all processors, and inclusive-or the "we need more iterations" // flags to see if *any* (active) processor needs more iterations // const bool any_proc_needs_more_iterations = broadcast_status(GH, N_procs, N_active_procs, my_proc, my_active_flag, hn, iteration, expansion_status, (norms_are_ok ? Theta_norms.rms_norm() : 0.0), (norms_are_ok ? Theta_norms.infinity_norm() : 0.0), found_this_horizon, I_need_more_iterations, isb); // set found-this-horizon flags // for all active processors' non-dummy horizons { for (int found_proc = 0 ; found_proc < N_active_procs ; ++found_proc) { const int found_hn = isb.hn_buffer[found_proc]; if (found_hn > 0) then AH_data_array[found_hn]->found_flag = isb.found_horizon_buffer[found_proc]; } } if (verbose_info.print_algorithm_details) then { CCTK_VInfo(CCTK_THORNSTRING, " ==> any_proc_needs_more_iterations=%d", int(any_proc_needs_more_iterations)); } // // print the iteration status info // if ((my_proc == 0) && verbose_info.print_algorithm_highlights) then print_status(N_active_procs, isb); // // for each processor which found a horizon, // broadcast its horizon info to all processors // and print the BH diagnostics on processor 0 // { for (int found_proc = 0 ; found_proc < N_active_procs ; ++found_proc) { if (! isb.found_horizon_buffer[found_proc]) then continue; const int found_hn = isb.hn_buffer[found_proc]; struct AH_data& found_AH_data = *AH_data_array[found_hn]; if (verbose_info.print_algorithm_details) then CCTK_VInfo(CCTK_THORNSTRING, " broadcasting proc %d/horizon %d diagnostics%s", found_proc, found_hn, (broadcast_horizon_shape ? "+shape" : "")); broadcast_horizon_data(GH, my_proc == found_proc, broadcast_horizon_shape, found_AH_data.BH_diagnostics, *found_AH_data.ps_ptr, found_AH_data.horizon_buffers); if ((my_proc == 0) && verbose_info.print_physics_details) then found_AH_data.BH_diagnostics .print(N_horizons, found_hn); } } // // if we found our horizon, maybe output the horizon shape? // if (found_this_horizon) then { if (IO_info.output_h) then output_gridfn(*ps_ptr, gfns::gfn__h, IO_info, IO_info.h_base_file_name, hn, verbose_info .print_algorithm_highlights); if (IO_info.output_Theta) then output_gridfn(*ps_ptr, gfns::gfn__Theta, IO_info, IO_info.Theta_base_file_name, hn, verbose_info .print_algorithm_highlights); } // // are all processors done with all their genuine horizons? // or if this is a single-processor run, are we done with this horizon? // if (! any_proc_needs_more_iterations) then { if (verbose_info.print_algorithm_details) then CCTK_VInfo(CCTK_THORNSTRING, "==> all processors are finished!"); return; // *** NORMAL RETURN *** } if ((N_procs == 1) && !this_horizon_needs_more_iterations) then { if (verbose_info.print_algorithm_debug) then CCTK_VInfo(CCTK_THORNSTRING, "==> [single processor] Skipping to next horizon"); break; // *** LOOP EXIT *** } // // compute the Jacobian matrix // *** this is a synchronous operation across all processors *** // if (verbose_info.print_algorithm_debug) then CCTK_VInfo(CCTK_THORNSTRING, " computing Jacobian: genuine/dummy flag %d", int(this_horizon_needs_more_iterations)); const enum expansion_status Jacobian_status = expansion_Jacobian (this_horizon_needs_more_iterations ? ps_ptr : NULL, this_horizon_needs_more_iterations ? Jac_ptr : NULL, cgi, gi, Jacobian_info, error_info, (iteration == 1), verbose_info.print_algorithm_details); const bool Jacobian_is_ok = (Jacobian_status == expansion_success); // // skip to the next horizon unless // this is a genuine Jacobian computation, and it went ok // if (! (this_horizon_needs_more_iterations && Jacobian_is_ok)) then { if (verbose_info.print_algorithm_debug) then CCTK_VInfo(CCTK_THORNSTRING, " skipping to next horizon"); break; // *** LOOP EXIT *** } // // compute the Newton step // if (verbose_info.print_algorithm_details) then CCTK_VInfo(CCTK_THORNSTRING, "solving linear system for Delta_h (%d unknowns)", Jac_ptr->N_rows()); const fp rcond = Jac_ptr->solve_linear_system(gfns::gfn__Theta, gfns::gfn__Delta_h, verbose_info.print_algorithm_details); if (rcond == 0.0) then { CCTK_VWarn(1, __LINE__, __FILE__, CCTK_THORNSTRING, "Newton_solve: Jacobian matrix is numerically singular!"); // give up on this horizon break; // *** LOOP CONTROL *** } if (verbose_info.print_algorithm_details) then { if (rcond > 0.0) then CCTK_VInfo(CCTK_THORNSTRING, "done solving linear system (condition number %.1e)", double(rcond)); else CCTK_VInfo(CCTK_THORNSTRING, "done solving linear system"); } if (solver_info.debugging_output_at_each_Newton_iteration) then output_gridfn(*ps_ptr, gfns::gfn__Delta_h, IO_info, IO_info.Delta_h_base_file_name, hn, verbose_info.print_algorithm_details, iteration); // // take the Newton step (scaled if need be) // Newton_step(*ps_ptr, solver_info.max_allowable_Delta_h_over_h, verbose_info); // end of this Newton iteration } // end of this horizon } // we should never get to here assert( false ); } //****************************************************************************** //****************************************************************************** //****************************************************************************** // // This function (which must be called on *every* processor) broadcasts // the per-iteration status information from each active processor to // all processors. // // Arguments: // GH --> The Cactus grid hierarchy. // N_procs = The total number of processors. // N_active_procs = The number of active processors. // my_active_flag = Is this processor an active processor? // hn,iteration,status, // {rms,infinity}_norm, // found_this_horizon,I_need_more_iterations // = On an active processors, these are the values to be broadcast. // On a dummy processors, these are ignored. // isb = (out) Holds both user buffers (set to the broadcast results) // and low-level buffers (used within this function). If the // buffer pointers are NULL then the buffers are allocated. // // Results: // This function returns the inclusive-or over all active processors, // of the broadcast I_need_more_iterations flags. // namespace { bool broadcast_status(const cGH* GH, int N_procs, int N_active_procs, int my_proc, bool my_active_flag, int hn, int iteration, enum expansion_status expansion_status, fp rms_norm, fp infinity_norm, bool found_this_horizon, bool I_need_more_iterations, struct iteration_status_buffers& isb) { assert( my_proc >= 0 ); assert( my_proc < N_procs ); // // We do the broadcast via a Cactus reduction operation (this is a KLUDGE, // but until Cactus gets a generic interprocessor communications mechanism // it's the best we can do without mucking with MPI ourself). To do the // gather via a reduction, we set up a 2-D buffer whose entries are all // 0s, except that on each processor the [my_proc] row has the values we // want to gather. Then we do a sum-reduction of the buffers across // processors. For the actual reduction we treat the buffers as 1-D // arrays on each processor (this slightly simplifies the code). // // To reduce overheads, we do the entire operation with a single (CCTK_REAL) // Cactus reduce, casting values to CCTK_REAL as necessary (and encoding // Boolean flags into signs of known-to-be-positive values. // // Alas MPI (and thus Cactus) requires the input and output reduce buffers // to be distinct, so we need two copies of the buffers on each processor. // // the buffers are actually 2-D arrays; these are the column numbers // ... if we wanted to, we could pack hn, iteration, and expansion_status // all into a single (64-bit) floating-point value, but it's not // worth the trouble... enum { // CCTK_INT buffer buffer_var__hn = 0, // also encodes found_this_horizon flag // in sign: +=true, -=false buffer_var__iteration, // also encodes I_need_more_iterations flag // in sign: +=true, -=false buffer_var__expansion_status, buffer_var__rms_norm, buffer_var__infinity_norm, N_buffer_vars // no comma }; // // allocate buffers if this is the first use // if (isb.hn_buffer == NULL) then { isb.hn_buffer = new int [N_active_procs]; isb.iteration_buffer = new int [N_active_procs]; isb.expansion_status_buffer = new enum expansion_status [N_active_procs]; isb.rms_norm_buffer = new fp [N_active_procs]; isb.infinity_norm_buffer = new fp [N_active_procs]; isb.found_horizon_buffer = new bool[N_active_procs]; isb.send_buffer_ptr = new jtutil::array2d (0, N_active_procs-1, 0, N_buffer_vars-1); isb.receive_buffer_ptr = new jtutil::array2d (0, N_active_procs-1, 0, N_buffer_vars-1); } jtutil::array2d& send_buffer = *isb.send_buffer_ptr; jtutil::array2d& receive_buffer = *isb.receive_buffer_ptr; // // pack this processor's values into the reduction buffer // jtutil::zero_C_array(send_buffer.N_array(), send_buffer.data_array()); if (my_active_flag) then { assert( send_buffer.is_valid_i(my_proc) ); assert( hn >= 0 ); // encoding scheme assumes this assert( iteration > 0 ); // encoding scheme assumes this send_buffer(my_proc, buffer_var__hn) = found_this_horizon ? +hn : -hn; send_buffer(my_proc, buffer_var__iteration) = I_need_more_iterations ? +iteration : -iteration; send_buffer(my_proc, buffer_var__expansion_status) = int(expansion_status); send_buffer(my_proc, buffer_var__rms_norm) = rms_norm; send_buffer(my_proc, buffer_var__infinity_norm) = infinity_norm; } // // do the reduction // // this name is appropriate for PUGHReduce, caveat user for other drivers :) const int reduction_handle = CCTK_ReductionArrayHandle("sum"); if (reduction_handle < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, "broadcast_status(): can't get sum-reduction handle!"); /*NOTREACHED*/ const int reduction_status = CCTK_ReduceLocArrayToArray1D(GH, -1, // broadcast results to all processors reduction_handle, static_cast (send_buffer .data_array()), static_cast< void*> (receive_buffer.data_array()), send_buffer.N_array(), CCTK_VARIABLE_REAL); if (reduction_status < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, "broadcast_status(): error status %d from reduction!", reduction_status); /*NOTREACHED*/ // // unpack the reduction buffer back to the high-level result buffers and // compute the inclusive-or of the broadcast I_need_more_iterations flags // bool any_proc_needs_more_iterations = false; for (int proc = 0 ; proc < N_active_procs ; ++proc) { const int hn_temp = static_cast( receive_buffer(proc, buffer_var__hn) ); isb.hn_buffer[proc] = jtutil::abs(hn_temp); isb.found_horizon_buffer[proc] = (hn_temp > 0); const int iteration_temp = static_cast( receive_buffer(proc, buffer_var__iteration) ); isb.iteration_buffer[proc] = jtutil::abs(iteration_temp); const bool proc_needs_more_iterations = (iteration_temp > 0); any_proc_needs_more_iterations |= proc_needs_more_iterations; isb.expansion_status_buffer[proc] = static_cast( static_cast( receive_buffer(proc, buffer_var__expansion_status) ) ); isb.rms_norm_buffer[proc] = receive_buffer(proc, buffer_var__rms_norm); isb.infinity_norm_buffer[proc] = receive_buffer(proc, buffer_var__infinity_norm); } return any_proc_needs_more_iterations; } } //****************************************************************************** // // This function (which must be called on *every* processor) broadcasts // the BH diagnostics and (ghosted) horizon shape from a specified processor // to all processors. // // Arguments: // GH --> The Cactus grid hierarchy. // broadcast_flag = true on the broadcasting processor, // false on all other processors // BH_diagnostics = On the broadcasting processor, this is the BH diagnostics // to broadcast; on all other processors, it's set to the // broadcast BH diagnostics. // ps = On the broadcasting processor, gfn__h is broadcast; // on all other processors, gfn__h is set to the broadcast values. // horizon_buffers = Internal buffers for use in the broadcast; // if N_buffer == 0 then we set N_buffer and allocate // the buffers. // namespace { void broadcast_horizon_data(const cGH* GH, bool broadcast_flag, bool broadcast_horizon_shape, struct BH_diagnostics& BH_diagnostics, patch_system& ps, struct horizon_buffers& horizon_buffers) { // // Implementation notes: // // We do the send via a Cactus sum-reduce where the data are 0 on // all processors except the sending one. // // To reduce the interprocessor-communications cost, we actually only // broadcast the nominal-grid horizon shape; we then do a synchronize // operation on the patch system to recover the full ghosted-grid shape. // if (horizon_buffers.N_buffer == 0) then { // allocate the buffers horizon_buffers.N_buffer = BH_diagnostics::N_buffer + (broadcast_horizon_shape ? ps.N_grid_points() : 0); horizon_buffers.send_buffer = new CCTK_REAL[horizon_buffers.N_buffer]; horizon_buffers.receive_buffer = new CCTK_REAL[horizon_buffers.N_buffer]; } if (broadcast_flag) then { // pack the data to be broadcast into the send buffer BH_diagnostics.copy_to_buffer(horizon_buffers.send_buffer); if (broadcast_horizon_shape) then { int posn = BH_diagnostics::N_buffer; for (int pn = 0 ; pn < ps.N_patches() ; ++pn) { const patch& p = ps.ith_patch(pn); for (int irho = p.min_irho() ; irho <= p.max_irho() ; ++irho) { for (int isigma = p.min_isigma() ; isigma <= p.max_isigma() ; ++isigma) { assert( posn < horizon_buffers.N_buffer ); horizon_buffers.send_buffer[posn++] = p.ghosted_gridfn(gfns::gfn__h, irho,isigma); } } } assert( posn == horizon_buffers.N_buffer ); } } else jtutil::zero_C_array(horizon_buffers.N_buffer, horizon_buffers.send_buffer); // this name is appropriate for PUGHReduce, caveat user for other drivers :) const int reduction_handle = CCTK_ReductionArrayHandle("sum"); if (reduction_handle < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, "broadcast_horizon_data(): can't get sum-reduction handle!"); /*NOTREACHED*/ const int status = CCTK_ReduceLocArrayToArray1D(GH, -1, // result broadcast to all processors reduction_handle, static_cast (horizon_buffers.send_buffer), static_cast< void*> (horizon_buffers.receive_buffer), horizon_buffers.N_buffer, CCTK_VARIABLE_REAL); if (status < 0) then CCTK_VWarn(FATAL_ERROR, __LINE__, __FILE__, CCTK_THORNSTRING, "broadcast_horizon_data(): error status %d from reduction!", status); /*NOTREACHED*/ if (!broadcast_flag) then { // unpack the data from the receive buffer BH_diagnostics.copy_from_buffer(horizon_buffers.receive_buffer); if (broadcast_horizon_shape) then { int posn = BH_diagnostics::N_buffer; for (int pn = 0 ; pn < ps.N_patches() ; ++pn) { patch& p = ps.ith_patch(pn); for (int irho = p.min_irho() ; irho <= p.max_irho() ; ++irho) { for (int isigma = p.min_isigma() ; isigma <= p.max_isigma() ; ++isigma) { assert( posn < horizon_buffers.N_buffer ); p.ghosted_gridfn(gfns::gfn__h, irho,isigma) = horizon_buffers.receive_buffer[posn++]; } } } assert( posn == horizon_buffers.N_buffer ); // recover the full ghosted-grid horizon shape // (we only broadcast the nominal-grid shape) ps.synchronize(); } } } } //****************************************************************************** //****************************************************************************** //****************************************************************************** // // This function is called on processor #0 to print the status of the // Newton iteration on each active processor. // // Arguments: // N_active_procs = The number of active processors. // isb = The high-level buffers here give the information to be printed. // namespace { void print_status(int N_active_procs, const struct iteration_status_buffers& isb) { for (int proc = 0 ; proc < N_active_procs ; ++proc) { // don't print anything for processors doing dummy evaluations if (isb.hn_buffer[proc] == 0) then continue; if (isb.expansion_status_buffer[proc] == expansion_success) then CCTK_VInfo(CCTK_THORNSTRING, " proc %d/horizon %d:it %d |Theta| rms=%.1e inf=%.1e", proc, isb.hn_buffer[proc], isb.iteration_buffer[proc], double(isb.rms_norm_buffer[proc]), double(isb.infinity_norm_buffer[proc])); else CCTK_VInfo(CCTK_THORNSTRING, " proc %d/horizon %d: %s", proc, isb.hn_buffer[proc], expansion_status_string( isb.expansion_status_buffer[proc] )); } } } //****************************************************************************** // // This function takes the Newton step, scaling it down if it's too large. // // Arguments: // ps = The patch system containing the gridfns h and Delta_h. // max_allowable_Delta_h_over_h = The maximum allowable // ||Delta_h||_infinity / ||h||_mean // Any step over this is internally clamped // (scaled down) to this size. // namespace { void Newton_step(patch_system& ps, fp max_allowable_Delta_h_over_h, const struct verbose_info& verbose_info) { // // compute scale factor (1 for small steps, <1 for large steps) // jtutil::norm h_norms; ps.ghosted_gridfn_norms(gfns::gfn__h, h_norms); const fp max_allowable_Delta_h = max_allowable_Delta_h_over_h * h_norms.mean(); jtutil::norm Delta_h_norms; ps.gridfn_norms(gfns::gfn__Delta_h, Delta_h_norms); const fp max_Delta_h = Delta_h_norms.infinity_norm(); const fp scale = (max_Delta_h <= max_allowable_Delta_h) ? 1.0 : max_allowable_Delta_h / max_Delta_h; if (verbose_info.print_algorithm_details) then { if (scale == 1.0) then CCTK_VInfo(CCTK_THORNSTRING, "h += Delta_h (rms-norm=%.1e, infinity-norm=%.1e)", Delta_h_norms.rms_norm(), Delta_h_norms.infinity_norm()); else CCTK_VInfo(CCTK_THORNSTRING, "h += %g * Delta_h (infinity-norm clamped to %.2g)", scale, scale * Delta_h_norms.infinity_norm()); } // // take the Newton step (scaled if necessary) // for (int pn = 0 ; pn < ps.N_patches() ; ++pn) { patch& p = ps.ith_patch(pn); for (int irho = p.min_irho() ; irho <= p.max_irho() ; ++irho) { for (int isigma = p.min_isigma() ; isigma <= p.max_isigma() ; ++isigma) { p.ghosted_gridfn(gfns::gfn__h, irho,isigma) -= scale * p.gridfn(gfns::gfn__Delta_h, irho,isigma); } } } } }