From 7dc88252a8079d648d18790df31f448615f31b9b Mon Sep 17 00:00:00 2001
From: jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>
Date: Wed, 4 Jun 2003 11:51:28 +0000
Subject: many changes to * use UMFPACK sparse matrix library * use new ILUCG
 sparse matrix library API * allow passing Cactus parameters down into the
 linear solvers * fix assorted small bugs

git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@1094 f88db872-0e4f-0410-b76b-b9085cfa78c5
---
 src/elliptic/Jacobian.hh            |   7 +-
 src/elliptic/dense_Jacobian.hh      |   9 +--
 src/elliptic/make.code.defn         |   2 +-
 src/elliptic/row_sparse_Jacobian.cc | 139 ++++++++++++++++++++++++------------
 src/elliptic/row_sparse_Jacobian.hh |  19 +++--
 5 files changed, 110 insertions(+), 66 deletions(-)

diff --git a/src/elliptic/Jacobian.hh b/src/elliptic/Jacobian.hh
index 803b1c7..15ead8e 100644
--- a/src/elliptic/Jacobian.hh
+++ b/src/elliptic/Jacobian.hh
@@ -187,10 +187,15 @@ struct	linear_solver_pars
 	struct	ILUCG_pars
 		{
 		fp  error_tolerance;
-		int max_CG_iterations;
+		// should we limit to N_rows_ conjugate gradient iterations?
+		bool limit_CG_iterations;
 		} ILUCG_pars;
 	struct	UMFPACK_pars
 		{
+		// number of iterative-improvement iterations to do
+		// inside UMFPACK after the sparse LU decompose/solve,
+		// each time we solve a linear system
+		int N_II_iterations;
 		} UMFPACK_pars;
 	};
 
diff --git a/src/elliptic/dense_Jacobian.hh b/src/elliptic/dense_Jacobian.hh
index cf0b8ba..b99cd84 100644
--- a/src/elliptic/dense_Jacobian.hh
+++ b/src/elliptic/dense_Jacobian.hh
@@ -87,8 +87,7 @@ public:
 	// solve linear system J.x = rhs via LAPACK
 	// ... rhs and x are nominal-grid gridfns
 	// ... overwrites Jacobian matrix with its LU decomposition
-	// ... returns estimated reciprocal condition number if known,
-	//	       -1.0 if condition number is unknown
+	// ... returns estimated reciprocal condition number
 	//	       ... rcond = 0.0         ==> exactly singular
 	//		   rcond < DBL_EPSILON ==> numerically singular
 	//		   rcond = 1.0         ==> orthogonal matrix
@@ -111,12 +110,6 @@ private:
 	integer *iwork_;	// size N_rows_
 	fp *rwork_;		// size 4*N_rows_
 	};
-
-//**************************************
-
-struct	Jacobian_pars__LAPACK
-	: public Jacobian_pars;
-
 #endif	// HAVE_DENSE_JACOBIAN__LAPACK
 
 //******************************************************************************
diff --git a/src/elliptic/make.code.defn b/src/elliptic/make.code.defn
index 6bc91e9..8f7883d 100644
--- a/src/elliptic/make.code.defn
+++ b/src/elliptic/make.code.defn
@@ -1,4 +1,4 @@
-# specification of things to be compiled in this directory
+# Cactus specification of things to be compiled in this directory
 # $Header$
 
 # Source files in this directory
diff --git a/src/elliptic/row_sparse_Jacobian.cc b/src/elliptic/row_sparse_Jacobian.cc
index 5dea587..a53c3fb 100644
--- a/src/elliptic/row_sparse_Jacobian.cc
+++ b/src/elliptic/row_sparse_Jacobian.cc
@@ -136,14 +136,14 @@ void CCTK_FCALL
 		     const float B[], float X[],
 		     integer ITEMP[], float RTEMP[],
 		     const float* EPS, const integer* ITER,
-		     integer* ISTATUS, integer* IERROR);
+		     integer* ISTATUS);
 void CCTK_FCALL
   CCTK_FNAME(dilucg)(const integer* N,
 		     const integer IA[], const integer JA[], const double A[],
 		     const double B[], double X[],
 		     integer ITEMP[], double RTEMP[],
 		     const double* EPS, const integer* ITER,
-		     integer* ISTATUS, integer* IERROR);
+		     integer* ISTATUS);
 
 #ifdef __cplusplus
        }	/* extern "C" */
@@ -435,24 +435,26 @@ void row_sparse_Jacobian::sort_each_row_into_column_order()
 // then copies the result back into JA_[] and A_[].
 //
 
-// how large of a buffer do we need?
-int N = 0;
+// buffer must be big enough to hold the largest row
+int max_N_in_row = 0;
 	  {
 	for (int II = 0 ; II < N_rows_ ; ++II)
 	{
-	N = jtutil::max(N, IA_[II+1]-IA_[II]);
+	max_N_in_row = jtutil::max(max_N_in_row, IA_[II+1]-IA_[II]);
 	}
 	  }
 
 // contiguous buffer for sorting
-struct matrix_element *const buffer = new struct matrix_element[N];
+struct matrix_element *const buffer = new struct matrix_element[max_N_in_row];
 
 	  {
 	for (int II = 0 ; II < N_rows_ ; ++II)
 	{
+	const int N_in_row = IA_[II+1] - IA_[II];
+
 	// copy this row's JA_[] and A_[] values to the buffer
 	const int start = IA_[II]-IO_;
-		for (int p = 0 ; p < N ; ++p)
+		for (int p = 0 ; p < N_in_row ; ++p)
 		{
 		const int posn = start + p;
 		buffer[p].JA = JA_[posn];
@@ -460,12 +462,11 @@ struct matrix_element *const buffer = new struct matrix_element[N];
 		}
 
 	// sort the buffer
-	qsort(static_cast<void *>(buffer), N,
-	      sizeof(struct matrix_element),
+	qsort(static_cast<void *>(buffer), N_in_row, sizeof(buffer[0]),
 	      &compare_matrix_elements);
 
 	// copy the buffer values back to this row's JA_[] and A_[]
-		for (int p = 0 ; p < N ; ++p)
+		for (int p = 0 ; p < N_in_row ; ++p)
 		{
 		const int posn = start + p;
 		JA_[posn] = buffer[p].JA;
@@ -582,7 +583,6 @@ row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG
 	 bool print_msg_flag /* = false */)
 	: row_sparse_Jacobian(ps, Fortran_index_origin,
 			      print_msg_flag),
-	  print_msg_flag_(print_msg_flag),
 	  itemp_(NULL),
 	  rtemp_(NULL)
 {
@@ -615,6 +615,12 @@ delete[] itemp_;
 //
 // It returns -1.0 (no condition number estimate).
 //
+// FIXME:
+// It would be more efficient to adjust the ILUCG error tolerance
+// based on the current Newton-iteration error level (in ../driver/Newton.cc).
+// That is, at early stages of the Newton iteration there's no need to
+// solve this linear system to high accuracy.
+//
 fp row_sparse_Jacobian__ILUCG::solve_linear_system
 	(int rhs_gfn, int x_gfn,
 	 const struct linear_solver_pars& pars,
@@ -654,8 +660,9 @@ fp *x = ps_.gridfn_data(x_gfn);
 const integer N = N_rows_;
 const fp *rhs = ps_.gridfn_data(rhs_gfn);
 const fp      eps            = pars.ILUCG_pars.error_tolerance;
-const integer max_iterations = pars.ILUCG_pars.max_CG_iterations;
-integer istatus, ierror;
+const integer max_iterations = pars.ILUCG_pars.limit_CG_iterations
+			       ? N_rows_ : 0;
+integer istatus;
 
 // the actual linear solution
 #if   defined(FP_IS_FLOAT)
@@ -664,29 +671,50 @@ integer istatus, ierror;
 		     rhs, x,
 		     itemp_, rtemp_,
 		     &eps, &max_iterations,
-		     &istatus, &ierror);
+		     &istatus);
 #elif defined(FP_IS_DOUBLE)
   CCTK_FNAME(dilucg)(&N,
 		     IA_, JA_, A_,
 		     rhs, x,
 		     itemp_, rtemp_,
 		     &eps, &max_iterations,
-		     &istatus, &ierror);
+		     &istatus);
 #else
   #error "don't know fp datatype!"
 #endif
-if (ierror != 0)
+if (istatus < 0)
    then error_exit(ERROR_EXIT,
 "***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
-"        error return istatus=%d from [sd]ilucg() routine!\n"
+"        error return from [sd]ilucg() routine!\n"
+"        istatus=%d < 0 ==> bad matrix structure, eg. zero diagonal element!\n"
 		   ,
 		   rhs_gfn, x_gfn,
 		   int(istatus));				/*NOTREACHED*/
 
+//
+// If istatus == 0,
+//    we didn't reach the convergence level within the specified
+//    maximum number of conjugate gradient (CG) iterations. :(
+//
+//    In practice, however, most of the time we don't need an
+//    uber-accurate solution, so it's reasonable to just continue here
+//    with whatever approximate solution the CG iteration reached.
+//
+//    If the Newton iteration converges, we're fine (if |Theta| is
+//    small then we have a good horizon position regardless of how we
+//    got there), so the only danger in continuing with an approximate
+//    linear-system solution here is that we may slow or even prevent
+//    the convergence of the Newton iteration.  In practice, this
+//    doesn't seem to be a serious problem...
+//
+const int actual_N_iterations = (istatus == 0) ? max_iterations : istatus;
 if (print_msg_flag)
    then CCTK_VInfo(CCTK_THORNSTRING,
-		   "   solution found in %d CG iterations",
-		   istatus);
+		   "   %d CG iteration%s%s",
+		   actual_N_iterations,
+		   ((actual_N_iterations == 1) ? "" : "s"),
+		   ((istatus == 0) ? " (no convergence ==> continuing)"
+				   : " (converged ok)"));
 
 return -1.0;			// no condition number estimate available
 }
@@ -705,11 +733,10 @@ row_sparse_Jacobian__UMFPACK::row_sparse_Jacobian__UMFPACK
 	 bool print_msg_flag /* = false */)
 	: row_sparse_Jacobian(ps, C_index_origin,
 			      print_msg_flag),
-	  print_msg_flag_(print_msg_flag),
 	  Control_(new double[UMFPACK_CONTROL]),
 	  Info_   (new double[UMFPACK_INFO   ]),
-	  solve_workspace_integer_(new integer[  N_rows_]),
-	  solve_workspace_double_ (new double [5*N_rows_]),
+	  solve_workspace_integer_(NULL),
+	  solve_workspace_double_ (NULL),
 	  Symbolic_(NULL),
 	  Numeric_(NULL)
 {
@@ -717,9 +744,7 @@ if (print_msg_flag)
    then CCTK_VInfo(CCTK_THORNSTRING,
 		   "      UMFPACK linear-equations solver");
 
-#ifdef NOT_YET
 umfpack_defaults(Control_);
-#endif
 }
 #endif	// HAVE_ROW_SPARSE_JACOBIAN__UMFPACK
 
@@ -731,12 +756,10 @@ umfpack_defaults(Control_);
 //
 row_sparse_Jacobian__UMFPACK::~row_sparse_Jacobian__UMFPACK()
 {
-#ifdef NOT_YET
 if (Numeric_ != NULL)
    then umfpack_free_numeric (&Numeric_ );
-if (Symbolic_ != NULL
+if (Symbolic_ != NULL)
    then umfpack_free_symbolic(&Symbolic_);
-#endif
 delete[] solve_workspace_double_;
 delete[] solve_workspace_integer_;
 delete[] Info_;
@@ -752,7 +775,9 @@ delete[] Control_;
 // being nominal-grid gridfns, using the UMFPACK sparse-LU-decomposition
 // package.
 //
-// It returns an estimate of J's reciprocal condition number.
+// Because the UMFPACK condition number estimator doesn't seem to be
+// reliable, this function conditionally prints it, but returns -1.0
+// to indicate that no (reliable) condition number estimate is available.
 //
 fp row_sparse_Jacobian__UMFPACK::solve_linear_system
 	(int rhs_gfn, int x_gfn,
@@ -790,13 +815,11 @@ if (Symbolic_ == NULL)
 	if (print_msg_flag)
 	   then CCTK_VInfo(CCTK_THORNSTRING,
 			   "   UMFPACK symbolic factorization");
-#ifdef NOT_YET
 	status = umfpack_symbolic(N_rows_, N_rows_,	// matrix size
 				  IA_, JA_,		// sparsity structure
 				  &Symbolic_,	// umfpack sets Symbolic to
 						// point to the new object
 				  Control_, Info_);
-#endif
 	if (status != UMFPACK_OK)
 	   then error_exit(ERROR_EXIT,
 "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n"
@@ -812,19 +835,44 @@ assert(Symbolic_ != NULL);
 if (print_msg_flag)
    then CCTK_VInfo(CCTK_THORNSTRING,
 		   "   UMFPACK sparse LU decomposition");
-#ifdef NOT_YET
 status = umfpack_numeric(IA_, JA_, A_,		// input matrix
 			 Symbolic_,
 			 &Numeric_,
 			 Control_, Info_);
-#endif
 if (status != UMFPACK_OK)
    then error_exit(ERROR_EXIT,
 "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n"
 "        error return status=%d from umfpack_numeric() routine\n"
 		   ,
 		   status);					/*NOTREACHED*/
-const fp rcond = Info_[UMFPACK_RCOND];
+if (print_msg_flag)
+   then CCTK_VInfo(CCTK_THORNSTRING,
+		   "      UMFPACK rcond=%.1e (unreliable ==> ignoring)",
+		   double(Info_[UMFPACK_RCOND]));
+const fp rcond = -1.0;
+
+
+//
+// set parameters for linear solve
+//
+if (pars.UMFPACK_pars.N_II_iterations >= 0)
+   then Control_[UMFPACK_IRSTEP] = pars.UMFPACK_pars.N_II_iterations;
+   else {
+	// accept the UMFPACK default (which we've already set)
+	// ==> no-op here
+	}
+const bool II_flag = (Control_[UMFPACK_IRSTEP] > 0);
+
+
+//
+// if we haven't done so already, allocate workspace
+// (how much depends on whether or not UMFPACK does iterative improvement)
+//
+if (solve_workspace_integer_ == NULL)
+   then solve_workspace_integer_ = new integer[N_rows_];
+if (solve_workspace_double_ == NULL)
+   then solve_workspace_double_  = new double [II_flag ? 5*N_rows_ : N_rows_];
+
 
 //
 // solve the transposed linear system
@@ -832,18 +880,21 @@ const fp rcond = Info_[UMFPACK_RCOND];
 const double* rhs = ps_.gridfn_data(rhs_gfn);
       double*   x = ps_.gridfn_data(  x_gfn);
 if (print_msg_flag)
-   then CCTK_VInfo(CCTK_THORNSTRING,
+   then {
+	CCTK_VInfo(CCTK_THORNSTRING,
 		   "   UMFPACK solution using LU decomposition");
-#ifdef NOT_YET
+	if (II_flag)
+	   then CCTK_VInfo(CCTK_THORNSTRING,
+		   "                          and iterative improvement");
+	}
 status = umfpack_wsolve(UMFPACK_At,		// solve transposed system
-		       IA_, JA_, A_,		// input matrix
-		       x,			// solution vector
-		       rhs,			// rhs vector
-		       Numeric_,		// LU decomposition object
-		       Control_, Info_,
-		       solve_workspace_integer_,
-		       solve_workspace_double_);
-#endif
+			IA_, JA_, A_,		// input matrix
+			x,			// solution vector
+			rhs,			// rhs vector
+			Numeric_,		// LU decomposition object
+			Control_, Info_,
+			solve_workspace_integer_,
+			solve_workspace_double_);
 if (status != UMFPACK_OK)
    then error_exit(ERROR_EXIT,
 "***** row_sparse_Jacobian__UMFPACK::solve_linear_system():\n"
@@ -851,10 +902,8 @@ if (status != UMFPACK_OK)
 		   ,
 		   status);					/*NOTREACHED*/
 
-#ifdef NOT_YET
 if (Numeric_ != NULL)
    then umfpack_free_numeric (&Numeric_ );
-#endif
 
 return rcond;
 }
diff --git a/src/elliptic/row_sparse_Jacobian.hh b/src/elliptic/row_sparse_Jacobian.hh
index 6fe9b2d..ebdda0e 100644
--- a/src/elliptic/row_sparse_Jacobian.hh
+++ b/src/elliptic/row_sparse_Jacobian.hh
@@ -31,14 +31,14 @@
 
   // define this to sanity-check the matrix data structures
   // after each new element is inserted
-  #define ROW_SPARSE_JACOBIAN__CHECK_AFTER_INSERT
+  #undef  ROW_SPARSE_JACOBIAN__CHECK_AFTER_INSERT
 
   // define this to print a line or two of information
   // for each of the main data-structure operations,
   // e.g. zero the matrix, start a new row, insert a new matrix element, etc
   // ... note this produces a lot of output
   //     (tens of megabytes for a reasonable-sized angular grid)
-  #undef  ROW_SPARSE_JACOBIAN__LOG_MAIN_OPS
+  #define ROW_SPARSE_JACOBIAN__LOG_MAIN_OPS
 
   // define this to print and sanity-check the matrix data structures
   // at the start of each new row; note this produces a *huge* amount of
@@ -159,9 +159,10 @@ protected:
 	// parameter for growth policy
 	// ... this should be > 0
 	// ... for debugging it may be useful to set this to a fairly
-	//     small value, to force some  grow_array()  calls when
-	//     the arrays are still small enough for easy examination
-	enum {base_growth_amount = 10};	// grow by this much + 1/2 current size
+	//     small value (even 1), to force some  grow_array()  calls
+	//     when the arrays are still small enough for easy examination
+	enum {base_growth_amount = 1000};	// grow by this much
+						// plus half the current size
 
 	// sort each row's JA_[] and A_[] array elements
 	// so the columns (JA_[] values) are in increasing order
@@ -251,8 +252,6 @@ public:
 	~row_sparse_Jacobian__ILUCG();
 
 private:
-	bool print_msg_flag_;
-
 	// work vectors for ILUCG subroutine
 	// ... allocated by  solve_linear_system()  on first call
 	integer* itemp_;
@@ -282,8 +281,7 @@ public:
 	// ... rhs and x are nominal-grid gridfns
 	// ... does symbolic factorization on first call,
 	//     reuses this on all following calls
-	// ... returns estimated reciprocal condition number if known,
-	//	       -1.0 if condition number is unknown
+	// ... returns estimated reciprocal condition number
 	//	       ... rcond = 0.0         ==> exactly singular
 	//		   rcond < DBL_EPSILON ==> numerically singular
 	//		   rcond = 1.0         ==> orthogonal matrix
@@ -299,13 +297,12 @@ public:
 	~row_sparse_Jacobian__UMFPACK();
 
 private:
-	bool print_msg_flag_;
-
 	// pointers to UMFPACK control arrays
 	double *Control_;
 	double *Info_;
 
 	// pointers to UMFPACK workspace arrays
+	// ... allocated by  solve_linear_system()  on first call
 	integer *solve_workspace_integer_;
 	double  *solve_workspace_double_;
 
-- 
cgit v1.2.3