From 7dc88252a8079d648d18790df31f448615f31b9b Mon Sep 17 00:00:00 2001 From: jthorn Date: Wed, 4 Jun 2003 11:51:28 +0000 Subject: many changes to * use UMFPACK sparse matrix library * use new ILUCG sparse matrix library API * allow passing Cactus parameters down into the linear solvers * fix assorted small bugs git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@1094 f88db872-0e4f-0410-b76b-b9085cfa78c5 --- src/elliptic/Jacobian.hh | 7 +- src/elliptic/dense_Jacobian.hh | 9 +-- src/elliptic/make.code.defn | 2 +- src/elliptic/row_sparse_Jacobian.cc | 139 ++++++++++++++++++++++++------------ src/elliptic/row_sparse_Jacobian.hh | 19 +++-- 5 files changed, 110 insertions(+), 66 deletions(-) diff --git a/src/elliptic/Jacobian.hh b/src/elliptic/Jacobian.hh index 803b1c7..15ead8e 100644 --- a/src/elliptic/Jacobian.hh +++ b/src/elliptic/Jacobian.hh @@ -187,10 +187,15 @@ struct linear_solver_pars struct ILUCG_pars { fp error_tolerance; - int max_CG_iterations; + // should we limit to N_rows_ conjugate gradient iterations? + bool limit_CG_iterations; } ILUCG_pars; struct UMFPACK_pars { + // number of iterative-improvement iterations to do + // inside UMFPACK after the sparse LU decompose/solve, + // each time we solve a linear system + int N_II_iterations; } UMFPACK_pars; }; diff --git a/src/elliptic/dense_Jacobian.hh b/src/elliptic/dense_Jacobian.hh index cf0b8ba..b99cd84 100644 --- a/src/elliptic/dense_Jacobian.hh +++ b/src/elliptic/dense_Jacobian.hh @@ -87,8 +87,7 @@ public: // solve linear system J.x = rhs via LAPACK // ... rhs and x are nominal-grid gridfns // ... overwrites Jacobian matrix with its LU decomposition - // ... returns estimated reciprocal condition number if known, - // -1.0 if condition number is unknown + // ... returns estimated reciprocal condition number // ... rcond = 0.0 ==> exactly singular // rcond < DBL_EPSILON ==> numerically singular // rcond = 1.0 ==> orthogonal matrix @@ -111,12 +110,6 @@ private: integer *iwork_; // size N_rows_ fp *rwork_; // size 4*N_rows_ }; - -//************************************** - -struct Jacobian_pars__LAPACK - : public Jacobian_pars; - #endif // HAVE_DENSE_JACOBIAN__LAPACK //****************************************************************************** diff --git a/src/elliptic/make.code.defn b/src/elliptic/make.code.defn index 6bc91e9..8f7883d 100644 --- a/src/elliptic/make.code.defn +++ b/src/elliptic/make.code.defn @@ -1,4 +1,4 @@ -# specification of things to be compiled in this directory +# Cactus specification of things to be compiled in this directory # $Header$ # Source files in this directory diff --git a/src/elliptic/row_sparse_Jacobian.cc b/src/elliptic/row_sparse_Jacobian.cc index 5dea587..a53c3fb 100644 --- a/src/elliptic/row_sparse_Jacobian.cc +++ b/src/elliptic/row_sparse_Jacobian.cc @@ -136,14 +136,14 @@ void CCTK_FCALL const float B[], float X[], integer ITEMP[], float RTEMP[], const float* EPS, const integer* ITER, - integer* ISTATUS, integer* IERROR); + integer* ISTATUS); void CCTK_FCALL CCTK_FNAME(dilucg)(const integer* N, const integer IA[], const integer JA[], const double A[], const double B[], double X[], integer ITEMP[], double RTEMP[], const double* EPS, const integer* ITER, - integer* ISTATUS, integer* IERROR); + integer* ISTATUS); #ifdef __cplusplus } /* extern "C" */ @@ -435,24 +435,26 @@ void row_sparse_Jacobian::sort_each_row_into_column_order() // then copies the result back into JA_[] and A_[]. // -// how large of a buffer do we need? -int N = 0; +// buffer must be big enough to hold the largest row +int max_N_in_row = 0; { for (int II = 0 ; II < N_rows_ ; ++II) { - N = jtutil::max(N, IA_[II+1]-IA_[II]); + max_N_in_row = jtutil::max(max_N_in_row, IA_[II+1]-IA_[II]); } } // contiguous buffer for sorting -struct matrix_element *const buffer = new struct matrix_element[N]; +struct matrix_element *const buffer = new struct matrix_element[max_N_in_row]; { for (int II = 0 ; II < N_rows_ ; ++II) { + const int N_in_row = IA_[II+1] - IA_[II]; + // copy this row's JA_[] and A_[] values to the buffer const int start = IA_[II]-IO_; - for (int p = 0 ; p < N ; ++p) + for (int p = 0 ; p < N_in_row ; ++p) { const int posn = start + p; buffer[p].JA = JA_[posn]; @@ -460,12 +462,11 @@ struct matrix_element *const buffer = new struct matrix_element[N]; } // sort the buffer - qsort(static_cast(buffer), N, - sizeof(struct matrix_element), + qsort(static_cast(buffer), N_in_row, sizeof(buffer[0]), &compare_matrix_elements); // copy the buffer values back to this row's JA_[] and A_[] - for (int p = 0 ; p < N ; ++p) + for (int p = 0 ; p < N_in_row ; ++p) { const int posn = start + p; JA_[posn] = buffer[p].JA; @@ -582,7 +583,6 @@ row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG bool print_msg_flag /* = false */) : row_sparse_Jacobian(ps, Fortran_index_origin, print_msg_flag), - print_msg_flag_(print_msg_flag), itemp_(NULL), rtemp_(NULL) { @@ -615,6 +615,12 @@ delete[] itemp_; // // It returns -1.0 (no condition number estimate). // +// FIXME: +// It would be more efficient to adjust the ILUCG error tolerance +// based on the current Newton-iteration error level (in ../driver/Newton.cc). +// That is, at early stages of the Newton iteration there's no need to +// solve this linear system to high accuracy. +// fp row_sparse_Jacobian__ILUCG::solve_linear_system (int rhs_gfn, int x_gfn, const struct linear_solver_pars& pars, @@ -654,8 +660,9 @@ fp *x = ps_.gridfn_data(x_gfn); const integer N = N_rows_; const fp *rhs = ps_.gridfn_data(rhs_gfn); const fp eps = pars.ILUCG_pars.error_tolerance; -const integer max_iterations = pars.ILUCG_pars.max_CG_iterations; -integer istatus, ierror; +const integer max_iterations = pars.ILUCG_pars.limit_CG_iterations + ? N_rows_ : 0; +integer istatus; // the actual linear solution #if defined(FP_IS_FLOAT) @@ -664,29 +671,50 @@ integer istatus, ierror; rhs, x, itemp_, rtemp_, &eps, &max_iterations, - &istatus, &ierror); + &istatus); #elif defined(FP_IS_DOUBLE) CCTK_FNAME(dilucg)(&N, IA_, JA_, A_, rhs, x, itemp_, rtemp_, &eps, &max_iterations, - &istatus, &ierror); + &istatus); #else #error "don't know fp datatype!" #endif -if (ierror != 0) +if (istatus < 0) then error_exit(ERROR_EXIT, "***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" -" error return istatus=%d from [sd]ilucg() routine!\n" +" error return from [sd]ilucg() routine!\n" +" istatus=%d < 0 ==> bad matrix structure, eg. zero diagonal element!\n" , rhs_gfn, x_gfn, int(istatus)); /*NOTREACHED*/ +// +// If istatus == 0, +// we didn't reach the convergence level within the specified +// maximum number of conjugate gradient (CG) iterations. :( +// +// In practice, however, most of the time we don't need an +// uber-accurate solution, so it's reasonable to just continue here +// with whatever approximate solution the CG iteration reached. +// +// If the Newton iteration converges, we're fine (if |Theta| is +// small then we have a good horizon position regardless of how we +// got there), so the only danger in continuing with an approximate +// linear-system solution here is that we may slow or even prevent +// the convergence of the Newton iteration. In practice, this +// doesn't seem to be a serious problem... +// +const int actual_N_iterations = (istatus == 0) ? max_iterations : istatus; if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, - " solution found in %d CG iterations", - istatus); + " %d CG iteration%s%s", + actual_N_iterations, + ((actual_N_iterations == 1) ? "" : "s"), + ((istatus == 0) ? " (no convergence ==> continuing)" + : " (converged ok)")); return -1.0; // no condition number estimate available } @@ -705,11 +733,10 @@ row_sparse_Jacobian__UMFPACK::row_sparse_Jacobian__UMFPACK bool print_msg_flag /* = false */) : row_sparse_Jacobian(ps, C_index_origin, print_msg_flag), - print_msg_flag_(print_msg_flag), Control_(new double[UMFPACK_CONTROL]), Info_ (new double[UMFPACK_INFO ]), - solve_workspace_integer_(new integer[ N_rows_]), - solve_workspace_double_ (new double [5*N_rows_]), + solve_workspace_integer_(NULL), + solve_workspace_double_ (NULL), Symbolic_(NULL), Numeric_(NULL) { @@ -717,9 +744,7 @@ if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, " UMFPACK linear-equations solver"); -#ifdef NOT_YET umfpack_defaults(Control_); -#endif } #endif // HAVE_ROW_SPARSE_JACOBIAN__UMFPACK @@ -731,12 +756,10 @@ umfpack_defaults(Control_); // row_sparse_Jacobian__UMFPACK::~row_sparse_Jacobian__UMFPACK() { -#ifdef NOT_YET if (Numeric_ != NULL) then umfpack_free_numeric (&Numeric_ ); -if (Symbolic_ != NULL +if (Symbolic_ != NULL) then umfpack_free_symbolic(&Symbolic_); -#endif delete[] solve_workspace_double_; delete[] solve_workspace_integer_; delete[] Info_; @@ -752,7 +775,9 @@ delete[] Control_; // being nominal-grid gridfns, using the UMFPACK sparse-LU-decomposition // package. // -// It returns an estimate of J's reciprocal condition number. +// Because the UMFPACK condition number estimator doesn't seem to be +// reliable, this function conditionally prints it, but returns -1.0 +// to indicate that no (reliable) condition number estimate is available. // fp row_sparse_Jacobian__UMFPACK::solve_linear_system (int rhs_gfn, int x_gfn, @@ -790,13 +815,11 @@ if (Symbolic_ == NULL) if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, " UMFPACK symbolic factorization"); -#ifdef NOT_YET status = umfpack_symbolic(N_rows_, N_rows_, // matrix size IA_, JA_, // sparsity structure &Symbolic_, // umfpack sets Symbolic to // point to the new object Control_, Info_); -#endif if (status != UMFPACK_OK) then error_exit(ERROR_EXIT, "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n" @@ -812,19 +835,44 @@ assert(Symbolic_ != NULL); if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, " UMFPACK sparse LU decomposition"); -#ifdef NOT_YET status = umfpack_numeric(IA_, JA_, A_, // input matrix Symbolic_, &Numeric_, Control_, Info_); -#endif if (status != UMFPACK_OK) then error_exit(ERROR_EXIT, "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n" " error return status=%d from umfpack_numeric() routine\n" , status); /*NOTREACHED*/ -const fp rcond = Info_[UMFPACK_RCOND]; +if (print_msg_flag) + then CCTK_VInfo(CCTK_THORNSTRING, + " UMFPACK rcond=%.1e (unreliable ==> ignoring)", + double(Info_[UMFPACK_RCOND])); +const fp rcond = -1.0; + + +// +// set parameters for linear solve +// +if (pars.UMFPACK_pars.N_II_iterations >= 0) + then Control_[UMFPACK_IRSTEP] = pars.UMFPACK_pars.N_II_iterations; + else { + // accept the UMFPACK default (which we've already set) + // ==> no-op here + } +const bool II_flag = (Control_[UMFPACK_IRSTEP] > 0); + + +// +// if we haven't done so already, allocate workspace +// (how much depends on whether or not UMFPACK does iterative improvement) +// +if (solve_workspace_integer_ == NULL) + then solve_workspace_integer_ = new integer[N_rows_]; +if (solve_workspace_double_ == NULL) + then solve_workspace_double_ = new double [II_flag ? 5*N_rows_ : N_rows_]; + // // solve the transposed linear system @@ -832,18 +880,21 @@ const fp rcond = Info_[UMFPACK_RCOND]; const double* rhs = ps_.gridfn_data(rhs_gfn); double* x = ps_.gridfn_data( x_gfn); if (print_msg_flag) - then CCTK_VInfo(CCTK_THORNSTRING, + then { + CCTK_VInfo(CCTK_THORNSTRING, " UMFPACK solution using LU decomposition"); -#ifdef NOT_YET + if (II_flag) + then CCTK_VInfo(CCTK_THORNSTRING, + " and iterative improvement"); + } status = umfpack_wsolve(UMFPACK_At, // solve transposed system - IA_, JA_, A_, // input matrix - x, // solution vector - rhs, // rhs vector - Numeric_, // LU decomposition object - Control_, Info_, - solve_workspace_integer_, - solve_workspace_double_); -#endif + IA_, JA_, A_, // input matrix + x, // solution vector + rhs, // rhs vector + Numeric_, // LU decomposition object + Control_, Info_, + solve_workspace_integer_, + solve_workspace_double_); if (status != UMFPACK_OK) then error_exit(ERROR_EXIT, "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n" @@ -851,10 +902,8 @@ if (status != UMFPACK_OK) , status); /*NOTREACHED*/ -#ifdef NOT_YET if (Numeric_ != NULL) then umfpack_free_numeric (&Numeric_ ); -#endif return rcond; } diff --git a/src/elliptic/row_sparse_Jacobian.hh b/src/elliptic/row_sparse_Jacobian.hh index 6fe9b2d..ebdda0e 100644 --- a/src/elliptic/row_sparse_Jacobian.hh +++ b/src/elliptic/row_sparse_Jacobian.hh @@ -31,14 +31,14 @@ // define this to sanity-check the matrix data structures // after each new element is inserted - #define ROW_SPARSE_JACOBIAN__CHECK_AFTER_INSERT + #undef ROW_SPARSE_JACOBIAN__CHECK_AFTER_INSERT // define this to print a line or two of information // for each of the main data-structure operations, // e.g. zero the matrix, start a new row, insert a new matrix element, etc // ... note this produces a lot of output // (tens of megabytes for a reasonable-sized angular grid) - #undef ROW_SPARSE_JACOBIAN__LOG_MAIN_OPS + #define ROW_SPARSE_JACOBIAN__LOG_MAIN_OPS // define this to print and sanity-check the matrix data structures // at the start of each new row; note this produces a *huge* amount of @@ -159,9 +159,10 @@ protected: // parameter for growth policy // ... this should be > 0 // ... for debugging it may be useful to set this to a fairly - // small value, to force some grow_array() calls when - // the arrays are still small enough for easy examination - enum {base_growth_amount = 10}; // grow by this much + 1/2 current size + // small value (even 1), to force some grow_array() calls + // when the arrays are still small enough for easy examination + enum {base_growth_amount = 1000}; // grow by this much + // plus half the current size // sort each row's JA_[] and A_[] array elements // so the columns (JA_[] values) are in increasing order @@ -251,8 +252,6 @@ public: ~row_sparse_Jacobian__ILUCG(); private: - bool print_msg_flag_; - // work vectors for ILUCG subroutine // ... allocated by solve_linear_system() on first call integer* itemp_; @@ -282,8 +281,7 @@ public: // ... rhs and x are nominal-grid gridfns // ... does symbolic factorization on first call, // reuses this on all following calls - // ... returns estimated reciprocal condition number if known, - // -1.0 if condition number is unknown + // ... returns estimated reciprocal condition number // ... rcond = 0.0 ==> exactly singular // rcond < DBL_EPSILON ==> numerically singular // rcond = 1.0 ==> orthogonal matrix @@ -299,13 +297,12 @@ public: ~row_sparse_Jacobian__UMFPACK(); private: - bool print_msg_flag_; - // pointers to UMFPACK control arrays double *Control_; double *Info_; // pointers to UMFPACK workspace arrays + // ... allocated by solve_linear_system() on first call integer *solve_workspace_integer_; double *solve_workspace_double_; -- cgit v1.2.3