aboutsummaryrefslogtreecommitdiff
path: root/src/elliptic
diff options
context:
space:
mode:
authorjthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>2003-01-22 12:27:46 +0000
committerjthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5>2003-01-22 12:27:46 +0000
commit8b0a74a327fbecb12f3d32ecff1fa28a6a6c2a15 (patch)
tree434af140b12333ff085ea5c5983d46387cbe6267 /src/elliptic
parent82e2251230c9dc90d1b4650bafc85e75bad61ad7 (diff)
* add support for sparse-matrix Jacobians ==> works!
* change default in param.ccl to use this * change default in src/include/config.h to default to no longer link in LAPACK routines * update documentation git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@931 f88db872-0e4f-0410-b76b-b9085cfa78c5
Diffstat (limited to 'src/elliptic')
-rw-r--r--src/elliptic/Jacobian.cc114
-rw-r--r--src/elliptic/Jacobian.hh202
-rw-r--r--src/elliptic/README40
-rw-r--r--src/elliptic/dense_Jacobian.cc113
-rw-r--r--src/elliptic/dense_Jacobian.hh102
-rw-r--r--src/elliptic/ilucg.f771055
-rw-r--r--src/elliptic/ilucg_wrapper.F7772
-rw-r--r--src/elliptic/lapack.h2
-rw-r--r--src/elliptic/lapack_wrapper.F7759
-rw-r--r--src/elliptic/make.code.defn4
-rw-r--r--src/elliptic/row_sparse_Jacobian.cc455
-rw-r--r--src/elliptic/row_sparse_Jacobian.hh282
12 files changed, 1977 insertions, 523 deletions
diff --git a/src/elliptic/Jacobian.cc b/src/elliptic/Jacobian.cc
index 048c6ac..08d8ecc 100644
--- a/src/elliptic/Jacobian.cc
+++ b/src/elliptic/Jacobian.cc
@@ -4,9 +4,8 @@
//
// <<<literal contents of "lapack.hh">>>
//
-// decode_Jacobian_type
-// new_Jacobian_sparsity
-// new_Jacobian_matrix
+// decode_Jacobian_store_solve_method -- decode string into internal enum
+// new_Jacobian -- object factory for Jacobian objects
//
#include <stdio.h>
@@ -44,111 +43,76 @@ using jtutil::error_exit;
//******************************************************************************
//
-// This function decodes a character string specifying a type (derived class)
-// of Jacobian, into an internal enum.
+// This function decodes a character string specifying a specific type
+// (derived class) of Jacobian, into an internal enum.
//
-enum Jacobian_type
- decode_Jacobian_type(const char Jacobian_type_string[])
+enum Jacobian_store_solve_method
+ decode_Jacobian_store_solve_method
+ (const char Jacobian_store_solve_method_string[])
{
-if (STRING_EQUAL(Jacobian_type_string, "dense matrix"))
+if (STRING_EQUAL(Jacobian_store_solve_method_string,
+ "dense matrix/LAPACK"))
then {
- #ifdef HAVE_DENSE_JACOBIAN
- return Jacobian_type__dense_matrix;
+ #ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ return Jacobian__dense_matrix__LAPACK;
#endif
}
-else if (STRING_EQUAL(Jacobian_type_string, "row-oriented sparse matrix"))
+else if (STRING_EQUAL(Jacobian_store_solve_method_string,
+ "row-oriented sparse matrix/ILUCG"))
then {
- #ifdef HAVE_ROW_SPARSE_JACOBIAN
- return Jacobian_type__row_sparse_matrix;
+ #ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ return Jacobian__row_sparse_matrix__ILUCG;
#endif
}
else error_exit(ERROR_EXIT,
-"decode_Jacobian_type(): unknown Jacobian_type_string=\"%s\"!\n",
- Jacobian_type_string); /*NOTREACHED*/
+"decode_Jacobian_store_solve_method():\n"
+" unknown Jacobian_store_solve_method_string=\"%s\"!\n",
+ Jacobian_store_solve_method_string); /*NOTREACHED*/
-// fall through to here ==> we recognize the matrix type,
+// fall through to here ==> we recognize the matrix store_solve_method,
// but it's not configured
error_exit(ERROR_EXIT,
"\n"
-" decode_Jacobian_type():\n"
-" Jacobian type \"%s\" is not configured in this binary;\n"
-" see \"src/include/config.hh\" for details on what Jacobian types\n"
-" are configured and how to change this\n"
+" decode_Jacobian_store_solve_method():\n"
+" Jacobian store_solve_method=\"%s\"\n"
+" is not configured in this binary see \"src/include/config.hh\"\n"
+" for details on what methods are configured and how to change this\n"
,
- Jacobian_type_string); /*NOTREACHED*/
+ Jacobian_store_solve_method_string); /*NOTREACHED*/
}
//******************************************************************************
//
-// This function is an "object factory" for Jacobian_sparsity objects:
-// it constructs and returns a pointer to a new-allocated Jacobian_sparsity
-// object of the specified derived type.
+// This function is an "object factory" for Jacobian objects: it constructs
+// and returns a pointer to a new-allocated Jacobian object of the
+// specified derived type.
//
// FIXME: the patch system shouldn't really have to be non-const, but
// the Jacobian constructors all require this to allow the
// linear solvers to directly update gridfns
//
-Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type,
- patch_system& ps,
- bool print_msg_flag /* = false */)
+Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method,
+ patch_system& ps,
+ bool print_msg_flag /* = false */)
{
-switch (Jac_type)
+switch (Jac_method)
{
-#ifdef HAVE_DENSE_JACOBIAN
- case Jacobian_type__dense_matrix:
- return new dense_Jacobian_sparsity(ps, print_msg_flag);
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ case Jacobian__dense_matrix__LAPACK:
+ return new dense_Jacobian__LAPACK(ps, print_msg_flag);
#endif
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
- case Jacobian_type__row_sparse_matrix:
- return new row_sparse_Jacobian_sparsity(ps, print_msg_flag);
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ case Jacobian__row_sparse_matrix__ILUCG:
+ return new row_sparse_Jacobian__ILUCG(ps, print_msg_flag);
#endif
default:
error_exit(ERROR_EXIT,
-"new_Jacobian_sparsity(): unknown Jacobian_type=(int)%d!\n",
- Jac_type); /*NOTREACHED*/
- }
-}
-
-//******************************************************************************
-
-//
-// This function is an "object factory" for Jacobian_matrix objects:
-// it constructs and returns a pointer to a new-allocated Jacobian_matrix
-// object of the specified derived type.
-//
-// FIXME: the patch system shouldn't really have to be non-const, but
-// the Jacobian constructors all require this to allow the
-// linear solvers to directly update gridfns
-//
-Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type,
- patch_system& ps,
- Jacobian_sparsity& JS,
- bool print_msg_flag /* = false */)
-{
-switch (Jac_type)
- {
-#ifdef HAVE_DENSE_JACOBIAN
- case Jacobian_type__dense_matrix:
- return new dense_Jacobian_matrix
- (ps, static_cast<dense_Jacobian_sparsity&>(JS),
- print_msg_flag);
-#endif
-
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
- case Jacobian_type__row_sparse_matrix:
- return new row_sparse_Jacobian_matrix
- (ps, static_cast<row_sparse_Jacobian_sparsity&>(JS),
- print_msg_flag);
-#endif
-
- default:
- error_exit(ERROR_EXIT,
-"new_Jacobian_matrix(): unknown Jacobian_type=(int)%d!\n",
- Jac_type); /*NOTREACHED*/
+ "new_Jacobian(): unknown method=(int)%d!\n",
+ int(Jac_method)); /*NOTREACHED*/
}
}
diff --git a/src/elliptic/Jacobian.hh b/src/elliptic/Jacobian.hh
index d4b90ab..64ca182 100644
--- a/src/elliptic/Jacobian.hh
+++ b/src/elliptic/Jacobian.hh
@@ -3,10 +3,7 @@
//
// Jacobian -- ABC to describe Jacobian matrix
-// Jacobian_sparsity -- ABC to accumulate Jacobian sparsity info
-// Jacobian_matrix -- ABC to store/use Jacobian matrix
-//
-// decode_Jacobian_type - decode string into internal enum
+// decode_Jacobian_store_solve_method - decode string into internal enum
// new_Jacobian - factory method
//
@@ -16,53 +13,67 @@
//
//******************************************************************************
-//******************************************************************************
-//******************************************************************************
//
// These classes are used to store and manipulate Jacobian matrices
// for a patch system.
//
-// The inheritance diagram for the Jacobian classes looks like this:
+// Jacobian is an abstract base class (ABC) that defines the generic
+// Jacobian API. We derive classes from it for each type of sparsity
+// pattern, then we derive classes from them for each linear solver.
+//
+// At present the inheritance graph looks like this:
// Jacobian
-// Jacobian_sparsity
-// dense_Jacobian_sparsity #ifdef HAVE_DENSE_JACOBIAN
-// row_sparse_Jacobian_sparsity #ifdef HAVE_ROW_SPARSE_JACOBIAN
-// Jacobian_matrix
-// dense_Jacobian_matrix #ifdef HAVE_DENSE_JACOBIAN
-// row_sparse_Jacobian_matrix #ifdef HAVE_ROW_SPARSE_JACOBIAN
-// where the most-derived classes are all conditional on the noted #ifdef
-// symbols.
-//
-// The Jacobian_sparsity objects are used to accumulate information
-// on the Jacobian's sparsity pattern; this is then used in constructing
-// the corresponding Jacobian_matrix object (which actually stores
-// the Jacobian, and contains member functions to solve elliptic systems
-// using the Jacobian as the LHS matrix).
+// dense_Jacobian
+// dense_Jacobian__LAPACK
+// row_sparse_Jacobian
+// row_sparse_Jacobian__ILUCG
+// each derived class is inside a corresponding #ifdef. The #ifdef
+// symbols for linear solvers are specified in "../include/config.h";
+// those for sparsity patterns are specified below based on the ones
+// for linear solvers.
+//
+
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ #define HAVE_DENSE_JACOBIAN
+#endif
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ #define HAVE_ROW_SPARSE_JACOBIAN
+#endif
+
+//******************************************************************************
+
//
// We assume a "traversal" interface for computing Jacobians, defined
-// by the Jacobian API. Because this API is shared by (all) the
-// Jacobian_sparsity and Jacobian_matrix classes, the same traversal
-// routines (prototyped to update Jacobian objects) can be used both
-// to record sparsity patterns (ignoring the actual floating-point values)
-// and to store the Jacobian matrix.
-//
-// Thus the typical code to construct and use a Jacobian matrix looks
-// like this:
+// by the Jacobian API. The typical code to construct and use a
+// Jacobian matrix looks like this:
// // prototype
-// void trverse_Jacobian(const patch_system& ps, Jacobian& Jac);
+// // ... calls Jac.zero_matrix()
+// // Jac.set_element()
+// // Jac.sum_into_element()
+// void traverse_Jacobian(const patch_system& ps, Jacobian& Jac);
//
-// Jacobian_sparsity& JS = new_Jacobian_sparsity(Jac_type, ps);
-// traverse_Jacobian(ps, JS);
-// Jacobian_matrix& Jac = new_Jacobian_matrix(Jac_type, ps, JS);
+// Jacobian& Jac = new_Jacobian(Jac_type, ps);
// traverse_Jacobian(ps, Jac);
// Jac.solve_linear_system(...);
//
+// Jac.zero_matrix()
+// traverse_Jacobian(ps, Jac); // must specify same sparsity pattern
+// // as before
+// Jac.solve_linear_system(...);
+//
+//
//
// A row/column index of the Jacobian (denoted II/JJ) is a 0-origin grid
// point number within the patch system.
//
+// Since many of the derived classes use Fortran routines, we also use
+// 1-origin indices; these have a leading "f", eg fII/fJJ.
+//
+
+//
// Note that the APIs here implicitly assume there is only a *single* gridfn
// in the Jacobian computation. (If we had N gridfns for this, then the
// Jacobian would really be a block matrix with N*N blocks.) This isn't
@@ -71,13 +82,13 @@
//******************************************************************************
-// ABC to define Jacobian-traversal interface
+// ABC to define Jacobian matrix
class Jacobian
{
public:
// basic meta-info
patch_system& my_patch_system() const { return ps_; }
- int NN() const { return NN_; }
+ int N_rows() const { return N_rows_; }
// convert (patch,irho,isigma) <--> row/column index
int II_of_patch_irho_isigma(const patch& p, int irho, int isigma)
@@ -87,6 +98,29 @@ public:
const
{ return ps_.patch_irho_isigma_of_gpn(II, irho,isigma); }
+
+ //
+ // convert C <--> Fortran indices
+ //
+ int csub(int f) const { return f-1; }
+ int fsub(int c) const { return c+1; }
+
+
+ //
+ // routines to access the matrix
+ //
+
+ // get a matrix element
+ virtual fp element(int II, int JJ) const = 0;
+
+ // is a given element explicitly stored, or implicitly 0 via sparsity
+ virtual bool is_explicitly_stored(int II, int JJ) const = 0;
+
+
+ //
+ // routines for setting values in the matrix
+ //
+
// zero the entire matrix
virtual void zero_matrix() = 0;
@@ -96,11 +130,28 @@ public:
// sum a value into a matrix element
virtual void sum_into_element(int II, int JJ, fp value) = 0;
-protected:
+
+ //
+ // solve linear system J.x = rhs
+ // ... rhs and x are nominal-grid gridfns
+ // ... may modify Jacobian matrix (eg for LU decomposition)
+ // ... returns 0.0 if matrix is numerically singular
+ // condition number (> 0.0) if known
+ // -1.0 if condition number is unknown
+ // ... once this has been called, the sparsity pattern should
+ // not be changed, i.e. no new nonzeros should be introduced
+ // into the matrix
+ //
+ virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0;
+
+
+ //
// constructor, destructor
+ //
+protected:
Jacobian(patch_system& ps)
: ps_(ps),
- NN_(ps.N_grid_points())
+ N_rows_(ps.N_grid_points())
{ }
public:
virtual ~Jacobian() { }
@@ -114,66 +165,18 @@ private:
protected:
patch_system& ps_;
- int NN_; // ps_.N_grid_points()
- };
-
-//******************************************************************************
-
-// ABC to accumulate Jacobian sparsity info
-class Jacobian_sparsity
- : public Jacobian
- {
-public:
- Jacobian_sparsity(patch_system& ps,
- bool print_msg_flag = false)
- : Jacobian(ps)
- { }
- virtual ~Jacobian_sparsity() { }
- };
-
-//******************************************************************************
-
-// Jacobian_matrix -- ABC to store/use Jacobian matrix
-class Jacobian_matrix
- : public Jacobian
- {
-public:
- // access (get) a matrix element
- virtual fp element(int II, int JJ) const = 0;
-
- // is a given element explicitly stored, or implicitly 0 via sparsity
- virtual bool is_explicitly_stored(int II, int JJ) const = 0;
-
- // solve linear system J.x = rhs
- // ... rhs and x are nominal-grid gridfns
- // ... may modify Jacobian matrix (eg for LU decomposition)
- // ... returns 0.0 if matrix is numerically singular
- // condition number (> 0.0) if known
- // -1.0 if condition number is unknown
- virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0;
-
- // constructor, destructor
- // ... note Jacobian_sparsity argument of constructor is non-const
- // to allow us to take over ownership of data structures
- Jacobian_matrix(patch_system& ps,
- Jacobian_sparsity& JS,
- bool print_msg_flag = false)
- : Jacobian(ps)
- { }
- virtual ~Jacobian_matrix() { }
+ int N_rows_;
};
//******************************************************************************
-//******************************************************************************
-//******************************************************************************
-enum Jacobian_type
+enum Jacobian_store_solve_method
{
-#ifdef HAVE_DENSE_JACOBIAN
- Jacobian_type__dense_matrix,
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ Jacobian__dense_matrix__LAPACK,
#endif
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
- Jacobian_type__row_sparse_matrix // no comma on last entry in enum
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ Jacobian__row_sparse_matrix__ILUCG // no comma on last entry in enum
#endif
};
@@ -182,17 +185,12 @@ enum Jacobian_type
//
// decode string into internal enum
-enum Jacobian_type
- decode_Jacobian_type(const char Jacobian_type_string[]);
+enum Jacobian_store_solve_method
+ decode_Jacobian_store_solve_method
+ (const char Jacobian_store_solve_method_string[]);
// "object factory" routines to construct and return
// pointers to new-allocated objects of specified derived types
-class Jacobian_sparsity;
-class Jacobian_matrix;
-Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type,
- patch_system& ps,
- bool print_msg_flag = false);
-Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type,
- patch_system& ps,
- Jacobian_sparsity& JS,
- bool print_msg_flag = false);
+Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method,
+ patch_system& ps,
+ bool print_msg_flag = false);
diff --git a/src/elliptic/README b/src/elliptic/README
index e20a5ba..09fa662 100644
--- a/src/elliptic/README
+++ b/src/elliptic/README
@@ -3,6 +3,40 @@ This directory contains code for solving elliptic systems on our
The main files are as follows:
-Jacobian.{cc,hh}
- These classes stores a Jacobian matrix and interface to
- linear-solver routines.
+Jacobian.{hh,cc}
+ These define a Jacobian class. This is a generic interface
+ (a C++ abstract base class) to set up a Jacobian matrix and
+ solve linear systems with this matrix as the right hand side
+ matrix.
+
+dense_Jacobian.{hh,cc}
+ These define a dense_Jacobian class (to store a Jacobian matrix
+ in a Fortran dense-matrix format) and a dense_Jacobian__LAPACK
+ class (to solve linear systems using LAPACK routines). These
+ classes are derived from (and hence share the generic interface
+ of) the Jacobian class.
+
+row_sparse_Jacobian.{hh,cc}
+ These define a row_sparse_Jacobian class (to store a Jacobian
+ matrix in a row-oriented sparse-matrix format) and a
+ row_sparse_Jacobian__ILUCG class (to solve linear systems using
+ an ILUCG). These classes are derived from (and hence share the
+ generic interface of) the Jacobian class.
+
+lapack.h
+ Header file defining C/C++ prototypes for a few LAPACK routines.
+lapack_wrapper.F77
+ Wrapper routines around a few LAPACK routines, to avoid problems
+ with C <--> Fortran passing of character-string arguments.
+
+ilucg.h
+ Header file defining C/C++ prototypes for ILUCG routines
+ilucg.f77
+ ILUCG (Incomplete LU-decomposition / conjugate gradiant)
+ subroutine. I don't know the original author of this routine;
+ it was provided to me by Tom Nicol, UBC Computing Center,
+ in c.1985.
+ilucg_wrapper.F77
+ Wrapper routines around the ILUCG routines, to avoid problems
+ with C <--> Fortran passing of Fortran logical return results.
+
diff --git a/src/elliptic/dense_Jacobian.cc b/src/elliptic/dense_Jacobian.cc
index 72dc797..7999513 100644
--- a/src/elliptic/dense_Jacobian.cc
+++ b/src/elliptic/dense_Jacobian.cc
@@ -1,15 +1,20 @@
-// Jacobian.cc -- data structures for the Jacobian matrix
+// dense_Jacobian.cc -- dense-matrix Jacobian
// $Header$
//
// <<<literal contents of "lapack.hh">>>
//
#ifdef HAVE_DENSE_JACOBIAN
-// dense_Jacobian_matrix::dense_Jacobian_matrix
-// dense_Jacobian_matrix::~dense_Jacobian_matrix
-// dense_Jacobian_matrix::zero_matrix
-// dense_Jacobian_matrix::solve_linear_system
+// dense_Jacobian::dense_Jacobian
+// dense_Jacobian::zero_matrix
#endif
+//
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+// dense_Jacobian__LAPACK::dense_Jacobian__LAPACK
+// dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK
+// dense_Jacobian__LAPACK::solve_linear_system
+#endif
+//
#include <stdio.h>
#include <assert.h>
@@ -45,8 +50,10 @@ using jtutil::error_exit;
//******************************************************************************
//
-// FIXME: Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug),
-// so we include the contents of "lapack.h" inline here.
+// FIXME:
+// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug),
+// so we include the contents of "lapack.h" inline here. This is a
+// kludge! :( :(
//#include "lapack.h"
//
@@ -57,7 +64,7 @@ using jtutil::error_exit;
/*
* prerequisites:
* "cctk.h"
- * "config.hh"
+ * "config.h"
*/
#ifdef __cplusplus
@@ -116,21 +123,17 @@ void CCTK_FCALL
#ifdef HAVE_DENSE_JACOBIAN
//
-// This function constructs a dense_Jacobian_matrix object.
+// This function constructs a dense_Jacobian object.
//
-dense_Jacobian_matrix::dense_Jacobian_matrix(patch_system& ps,
- dense_Jacobian_sparsity& JS,
- bool print_msg_flag /* = false */)
- : Jacobian_matrix(ps, JS),
- matrix_(0,NN()-1, 0,NN()-1),
- pivot_(new integer[NN()]),
- iwork_(new integer[NN()]),
- rwork_(new fp[4*NN()]) // no comma
+dense_Jacobian::dense_Jacobian(patch_system& ps,
+ bool print_msg_flag /* = false */)
+ : Jacobian(ps),
+ matrix_(0,N_rows_-1, 0,N_rows_-1)
{
if (print_msg_flag)
then CCTK_VInfo(CCTK_THORNSTRING,
- "dense_Jacobian_matrix ctor: NN()=%d",
- NN());
+ "constructing dense_Jacobian: N_rows_=%d",
+ N_rows_);
}
#endif // HAVE_DENSE_JACOBIAN
@@ -138,55 +141,73 @@ if (print_msg_flag)
#ifdef HAVE_DENSE_JACOBIAN
//
-// THis function destroys a dense_Jacobian_matrix object.
+// This function zeros a dense_Jacobian object.
+// Bugs: it could be made more efficient...
//
-dense_Jacobian_matrix::~dense_Jacobian_matrix()
+void dense_Jacobian::zero_matrix()
{
-delete[] iwork_;
-delete[] rwork_;
-delete[] pivot_;
+ for (int JJ = 0 ; JJ < N_rows_ ; ++JJ)
+ {
+ for (int II = 0 ; II < N_rows_ ; ++II)
+ {
+ set_element(II,JJ, 0.0);
+ }
+ }
}
#endif // HAVE_DENSE_JACOBIAN
//******************************************************************************
+//******************************************************************************
+//******************************************************************************
-#ifdef HAVE_DENSE_JACOBIAN
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
//
-// This function zeros a dense_Jacobian_matrix object.
-// Bugs: it could be made more efficient...
+// This function constructs a dense_Jacobian__LAPACK object.
//
-void dense_Jacobian_matrix::zero_matrix()
+dense_Jacobian__LAPACK::dense_Jacobian__LAPACK(patch_system& ps,
+ bool print_msg_flag /* = false */)
+ : dense_Jacobian(ps, print_msg_flag),
+ pivot_(new integer[ N_rows_]),
+ iwork_(new integer[ N_rows_]),
+ rwork_(new fp [4*N_rows_])
{
- for (int JJ = 0 ; JJ < NN() ; ++JJ)
- {
- for (int II = 0 ; II < NN() ; ++II)
- {
- set_element(II,JJ, 0.0);
- }
- }
}
-#endif // HAVE_DENSE_JACOBIAN
+#endif // HAVE_DENSE_JACOBIAN__LAPACK
//******************************************************************************
-#ifdef HAVE_DENSE_JACOBIAN
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+//
+// This function destroys a dense_Jacobian__LAPACK object.
+//
+dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK()
+{
+delete[] rwork_;
+delete[] iwork_;
+delete[] pivot_;
+}
+#endif // HAVE_DENSE_JACOBIAN__LAPACK
+
+//******************************************************************************
+
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
//
// This function solves the linear system J.x = rhs, with rhs and x
// being nominal-grid gridfns, using LAPACK LU-decomposition routines.
// It returns the estimated infinity-norm condition number of the linear
// system, or 0.0 if the matrix is numerically singular
//
-fp dense_Jacobian_matrix::solve_linear_system(int rhs_gfn, int x_gfn)
+fp dense_Jacobian__LAPACK::solve_linear_system(int rhs_gfn, int x_gfn)
{
-const fp *rhs = my_patch_system().gridfn_data(rhs_gfn);
-fp *x = my_patch_system().gridfn_data(x_gfn);
+const fp *rhs = ps_.gridfn_data(rhs_gfn);
+fp *x = ps_.gridfn_data(x_gfn);
fp *A = matrix_.data_array();
const integer infinity_norm_flag = 0;
const integer stride = 1;
const integer NRHS = 1;
-const integer N = NN();
-const integer N2 = NN() * NN();
+const integer N = N_rows_;
+const integer N2 = N_rows_ * N_rows_;
// compute the infinity-norm of the matrix A
// ... max_posn = 1-origin index of A[] element with largest absolute value
@@ -204,7 +225,7 @@ const fp A_infnorm = jtutil::abs(A[max_posn-1]);
// ... [sd]gesv() use an "in out" design, where the same argument
// is used for both rhs and x ==> we must first copy rhs to x
//
- for (int II = 0 ; II < NN() ; ++II)
+ for (int II = 0 ; II < N_rows_ ; ++II)
{
x[II] = rhs[II];
}
@@ -220,8 +241,8 @@ integer info;
if (info < 0)
then error_exit(ERROR_EXIT,
"\n"
-"***** dense_Jacobian_matrix::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
-" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!\n"
+"***** dense_Jacobian__LAPACK::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
+" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!"
,
rhs_gfn, x_gfn,
int(info)); /*NOTREACHED*/
@@ -248,4 +269,4 @@ if (rcond == 0.0)
// *** (singular matrix)
return 1.0/rcond;
}
-#endif // HAVE_DENSE_JACOBIAN
+#endif // HAVE_DENSE_JACOBIAN__LAPACK
diff --git a/src/elliptic/dense_Jacobian.hh b/src/elliptic/dense_Jacobian.hh
index 1e269d5..0c85f47 100644
--- a/src/elliptic/dense_Jacobian.hh
+++ b/src/elliptic/dense_Jacobian.hh
@@ -1,10 +1,12 @@
-// Jacobian.hh -- data structures for the Jacobian matrix
+// dense_Jacobian.hh -- dense-matrix Jacobian
// $Header$
//
#ifdef HAVE_DENSE_JACOBIAN
-// dense_Jacobian_sparsity -- (no-op) accumulate dense Jacobian sparsity info
-// dense_Jacobian_matrix -- Jacobian stored as a dense matrix
+// dense_Jacobian -- Jacobian stored as a dense matrix
+#endif
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+// dense_Jacobian__LAPACK -- dense_Jacobian with LAPACK linear solver
#endif
//
@@ -18,44 +20,30 @@
#ifdef HAVE_DENSE_JACOBIAN
//
-// This class accumulates the sparsity pattern of a dense-matrix Jacobian.
-//
-// Since a dense-matrix Jacobian doesn't care about the sparsity pattern,
-// the member functions of this class are just no-ops.
+// This class stores the Jacobian as a dense matrix in Fortran (column)
+// order.
//
-class dense_Jacobian_sparsity
- : public Jacobian_sparsity
+class dense_Jacobian
+ : public Jacobian
{
public:
- // record the zeroing of the entire matrix
- void zero_matrix() { }
+ //
+ // routines to access the matrix
+ //
- // record the setting of a matrix element to a specified value
- void set_element(int II, int JJ, fp value) { }
+ // get a matrix element
+ fp element(int II, int JJ) const
+ { return matrix_(JJ,II); }
- // record the summing of a value into a matrix element
- void sum_into_element(int II, int JJ, fp value) { }
+ // dense matrix ==> all elements are explicitly stored
+ bool is_explicitly_stored(int II, int JJ) const
+ { return true; }
- // constructor, destructor
- dense_Jacobian_sparsity(patch_system& ps,
- bool print_msg_flag = false)
- : Jacobian_sparsity(ps, print_msg_flag)
- { }
- ~dense_Jacobian_sparsity() { }
- };
-#endif // HAVE_DENSE_JACOBIAN
-//******************************************************************************
+ //
+ // routines for setting values in the matrix
+ //
-#ifdef HAVE_DENSE_JACOBIAN
-//
-// This class stores the Jacobian as a dense matrix in Fortran (column)
-// order.
-//
-class dense_Jacobian_matrix
- : public Jacobian_matrix
- {
-public:
// zero the entire matrix
void zero_matrix();
@@ -67,16 +55,30 @@ public:
void sum_into_element(int II, int JJ, fp value)
{ matrix_(JJ,II) += value; }
- // access (get) a matrix element
- fp element(int II, int JJ)
- const
- { return matrix_(JJ,II); }
+ //
+ // constructor, destructor
+ //
+protected:
+ dense_Jacobian(patch_system& ps,
+ bool print_msg_flag = false);
+ ~dense_Jacobian() { }
+
+protected:
+ // Fortran storage order ==> subscripts are (JJ,II)
+ jtutil::array2d<fp> matrix_;
+ };
+#endif // HAVE_DENSE_JACOBIAN
- // this is a dense matrix ==> all values are stored explicitly
- bool is_explicitly_stored(int II, int JJ)
- const
- { return true; }
+//******************************************************************************
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+//
+// This class defines the linear solver routine using LAPACK.
+//
+class dense_Jacobian__LAPACK
+ : public dense_Jacobian
+ {
+public:
// solve linear system J.x = rhs via LAPACK
// ... rhs and x are nominal-grid gridfns
// ... overwrites Jacobian matrix with its LU decomposition
@@ -87,20 +89,16 @@ public:
// constructor, destructor
public:
- dense_Jacobian_matrix(patch_system& ps,
- dense_Jacobian_sparsity& JS,
- bool print_msg_flag = false);
- ~dense_Jacobian_matrix();
+ dense_Jacobian__LAPACK(patch_system& ps,
+ bool print_msg_flag = false);
+ ~dense_Jacobian__LAPACK();
private:
- // subscripts are (JJ,II) (both 0-origin)
- jtutil::array2d<fp> matrix_;
-
// pivot vector for LAPACK routines
- integer *pivot_;
+ integer *pivot_; // size N_rows_
// work vectors for LAPACK condition number computation
- integer *iwork_;
- fp *rwork_;
+ integer *iwork_; // size N_rows_
+ fp *rwork_; // size 4*N_rows_
};
-#endif // HAVE_DENSE_JACOBIAN
+#endif // HAVE_DENSE_JACOBIAN__LAPACK
diff --git a/src/elliptic/ilucg.f77 b/src/elliptic/ilucg.f77
new file mode 100644
index 0000000..68f99f5
--- /dev/null
+++ b/src/elliptic/ilucg.f77
@@ -0,0 +1,1055 @@
+ LOGICAL FUNCTION SILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE)
+ IMPLICIT REAL (A-H,O-Z)
+C
+C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT
+C - -- - -
+C WHERE:
+C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND
+C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND
+C B AND X ARE THE NEW RHS AND INITIAL GUESS.
+C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE
+C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO
+C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1).
+C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES
+C THE COLUMN NUMBER OF A(K).
+C A IS A REAL ARRAY DIMENSIONED MAX. IT CONTAINS THE
+C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW.
+C B CONTAINS THE RHS VECTOR.
+C X IS A REAL ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS
+C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION.
+C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2.
+C RTEMP IS AN REAL SCRATCH ARRAY DIMENSIONED 4*|N|+MAX.
+C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE
+C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE
+C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE
+C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE
+C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS
+C .LT. 0.0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION,
+C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO
+C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR.
+C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT".
+C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES
+C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE).
+C ON ERROR RETURN, THE ROW IN ERROR.
+C
+C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN.
+C
+C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.)
+C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.)
+C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.)
+C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.)
+C
+C REFERENCE:
+C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT
+C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS",
+C J.COMPUT.PHYS. JAN 1978 PP 43-65
+C
+ DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*)
+ LOGICAL SLU0
+ NP=IABS(N)
+ IE=0
+ IF (NP.EQ.0) GO TO 20
+C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS.
+ N1=NP+1
+ MAX=IA(N1)-IA(1)
+ ILU=1
+ JLU=ILU+N1
+ ID=JLU+MAX
+ IC=ID+NP
+ JC=IC+N1
+ JCI=JC+MAX
+ IR=1
+ IP=IR+NP
+ IS1=IP+NP
+ IS2=IS1+NP
+ IALU=IS2+NP
+ IF (N.LT.0) GO TO 10
+C DO INCOMPLETE LU DECOMPOSITION
+ IF (SLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU),
+ * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20
+C AND DO CONJUGATE GRADIENT ITERATIONS
+C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS
+C - J. THORNBURG, 17/MAY/85.
+10 CALL SNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID),
+ * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2),
+ * EPS,ITER,IE)
+ SILUCG = .FALSE.
+ RETURN
+20 SILUCG = .TRUE.
+ RETURN
+ END
+ LOGICAL FUNCTION SLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*),
+ * ALU(*),ILU(*),JLU(*),ID(N),V(N)
+ LOGICAL NODIAG
+ COMMON /ICBD00/ ICBAD
+C INCOMPLETE LU DECOMPOSITION
+C WHERE:
+C N,IA,JA, AND A ARE DESCRIBED IN FUNCTION SILUCG
+C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE
+C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN
+C ARRAY JC.
+C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN
+C THE COLUMN STRUCTURE.
+C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT
+C OF THE COLUMN STRUCTURE.
+C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL
+C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE
+C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U.
+C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU.
+C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS
+C INDICES TO THE DIAGONAL ELEMENTS OF U.
+C V IS A REAL SCRATCH VECTOR OF LENGTH N.
+C IE GIVES THE ROW NUMBER IN ERROR.
+C
+C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR.
+C
+C NOTE: SLU0 SETS ARGUMENTS IC THROUGH V.
+C
+ ICBAD=0
+C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U.
+C
+C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE
+ DO 10 I=1,N
+ IC(I)=0
+10 CONTINUE
+ DO 30 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ NODIAG=.TRUE.
+ DO 20 K=KS,KE
+ J=JA(K)
+ IF (J.LT.1.OR.J.GT.N) GO TO 210
+ IC(J)=IC(J)+1
+ IF (J.EQ.I) NODIAG=.FALSE.
+20 CONTINUE
+ IF (NODIAG) GO TO 210
+30 CONTINUE
+C MAKE IC INTO INDICES
+ KOLD=IC(1)
+ IC(1)=1
+ DO 40 I=1,N
+ KNEW=IC(I+1)
+ IF (KOLD.EQ.0) GO TO 210
+ IC(I+1)=IC(I)+KOLD
+ KOLD=KNEW
+40 CONTINUE
+C SET JC AND JCI FOR COLUMN STRUCTURE
+ DO 60 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ DO 50 K=KS,KE
+ J=JA(K)
+ L=IC(J)
+ IC(J)=L+1
+ JC(L)=I
+ JCI(L)=K
+50 CONTINUE
+60 CONTINUE
+C FIX UP IC
+ KOLD=IC(1)
+ IC(1)=1
+ DO 70 I=1,N
+ KNEW=IC(I+1)
+ IC(I+1)=KOLD
+ KOLD=KNEW
+70 CONTINUE
+C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE
+ NP=N+1
+ DO 80 I=1,NP
+ ILU(I)=IA(I)
+80 CONTINUE
+C MOVE ELEMENTS, SET JLU AND ID
+ DO 100 J=1,N
+ KS=IC(J)
+ KE=IC(J+1)-1
+ DO 90 K=KS,KE
+ I=JC(K)
+ L=ILU(I)
+ ILU(I)=L+1
+ JLU(L)=J
+ KK=JCI(K)
+ ALU(L)=A(KK)
+ IF (I.EQ.J) ID(J)=L
+90 CONTINUE
+100 CONTINUE
+C RESET ILU (COULD JUST USE IA)
+ DO 110 I=1,NP
+ ILU(I)=IA(I)
+110 CONTINUE
+C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE
+C
+C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION
+ DO 120 I=1,N
+ V(I)=0.0
+120 CONTINUE
+ DO 200 IROW=1,N
+ I=ID(IROW)
+ PIVOT=ALU(I)
+ IF (PIVOT.NE.0.0) GO TO 140
+C THIS CASE MAKES THE ILU LESS ACCURATE
+ ICBAD=ICBAD+1
+ KS=ILU(IROW)
+ KE=ILU(IROW+1)-1
+ DO 130 K=KS,KE
+ PIVOT=PIVOT+ABS(ALU(K))
+130 CONTINUE
+ IF (PIVOT.EQ.0.0) GO TO 220
+140 PIVOT=1.0/PIVOT
+ ALU(I)=PIVOT
+ KKS=I+1
+ KKE=ILU(IROW+1)-1
+ IF (KKS.GT.KKE) GO TO 160
+ DO 150 K=KKS,KKE
+ J=JLU(K)
+ V(J)=ALU(K)
+150 CONTINUE
+C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX
+160 KS=IC(IROW)
+ KE=IC(IROW+1)-1
+ DO 190 K=KS,KE
+ I=JC(K)
+ IF (I.LE.IROW) GO TO 190
+ LS=ILU(I)
+ LE=ILU(I+1)-1
+ DO 180 L=LS,LE
+ J=JLU(L)
+ IF (J.LT.IROW) GO TO 180
+ IF (J.GT.IROW) GO TO 170
+ AMULT=ALU(L)*PIVOT
+ ALU(L)=AMULT
+ IF (AMULT.EQ.0.0) GO TO 190
+ GO TO 180
+170 IF (V(J).EQ.0.0) GO TO 180
+ ALU(L)=ALU(L)-AMULT*V(J)
+180 CONTINUE
+190 CONTINUE
+C RESET V
+ IF (KKS.GT.KKE) GO TO 200
+ DO 195 K=KKS,KKE
+ J=JLU(K)
+ V(J)=0.0
+195 CONTINUE
+200 CONTINUE
+C NORMAL RETURN
+ SLU0 = .FALSE.
+ RETURN
+C ERROR RETURNS
+210 IE=I
+ SLU0 = .TRUE.
+ RETURN
+C NORMAL RETURN
+220 IE=IROW
+ SLU0 = .TRUE.
+ RETURN
+ END
+ SUBROUTINE SNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2,
+ * EPS,ITER,IE)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N),
+ * R(N),P(N),S1(N),S2(N)
+C NONSYMMETRIC CONJUGATE GRADIENT
+C WHERE:
+C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE SILUCG.
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU.
+C JLU GIVES COLUMN NUMBER.
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U.
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U.
+C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE
+C ITERATIONS.
+C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85.
+C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE
+C SILUCG).
+C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT".
+C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF
+C NO CONVERGENCE.
+C
+C R0=B-A*X0
+ CALL SMUL10(N,IA,JA,A,X,R)
+ DO 10 I=1,N
+ R(I)=B(I)-R(I)
+10 CONTINUE
+C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0
+C FIRST SOLVE L*LT*S1=R0
+ CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+C TIMES TRANSPOSE OF A
+ CALL SMUL20(N,IA,JA,A,S1,S2)
+C THEN SOLVE UT*U*P=S2
+ CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,P)
+ IE=0
+ RDOT = SGVV(R,S1,N)
+C LOOP BEGINS HERE
+20 CALL SMUL30(N,ILU,JLU,ID,ALU,P,S2)
+ PDOT = SGVV(P,S2,N)
+C
+ IF (PDOT.EQ.0.0) RETURN
+C
+ ALPHA=RDOT/PDOT
+C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P)
+ XMAX=1.0
+ XDIF=0.0
+ DO 30 I=1,N
+ AP=ALPHA*P(I)
+ X(I)=X(I)+AP
+C EQUATION 9PB X=X+ALPHA*P
+ AP=ABS(AP)
+ XX=ABS(X(I))
+ IF (AP.GT.XDIF) XDIF=AP
+ IF (XX.GT.XMAX) XMAX=XX
+30 CONTINUE
+ IE=IE+1
+C
+C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89)
+C
+ IF ((EPS .GT. 0.0) .AND. (XDIF .LE. EPS * XMAX)) RETURN
+ IF ((EPS .LT. 0.0) .AND. (XMAX + XDIF/ABS(EPS) .EQ. XMAX))
+ * RETURN
+C
+C EXCEEDED ITERATION LIMIT?
+C
+ IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60
+ CALL SMUL10(N,IA,JA,A,P,S2)
+ DO 40 I=1,N
+ R(I)=R(I)-ALPHA*S2(I)
+C EQUATION 9PC R=R-ALPHA*A*P
+40 CONTINUE
+ CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+C CALL INPROD(R,S1,EDOT,RRDOT,N)
+ RRDOT = SGVV(R,S1,N)
+ BETA=RRDOT/RDOT
+C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R)
+ RDOT=RRDOT
+ CALL SMUL20(N,IA,JA,A,S1,S2)
+ CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,S1)
+ DO 50 I=1,N
+ P(I)=S1(I)+BETA*P(I)
+C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P
+50 CONTINUE
+ GO TO 20
+60 IE=-IE
+ RETURN
+ END
+ SUBROUTINE SMUL10(N,IA,JA,A,B,X)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+C MULTIPLY A TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+C JA GIVES COLUMN NUMBER
+C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+C MATRIX STORED BY ROW
+C B IS THE VECTOR
+C X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+C
+ DO 20 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ SUM=0.0
+ DO 10 K=KS,KE
+ J=JA(K)
+ SUM=SUM+A(K)*B(J)
+10 CONTINUE
+ X(I)=SUM
+20 CONTINUE
+ RETURN
+ END
+ SUBROUTINE SMUL20(N,IA,JA,A,B,X)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+C MULTIPLY TRANSPOSE OF A TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+C JA GIVES COLUMN NUMBER
+C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+C MATRIX STORED BY ROW
+C B IS THE VECTOR
+C X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+C
+ DO 10 I=1,N
+ X(I)=0.0
+10 CONTINUE
+ DO 30 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ BB=B(I)
+ DO 20 K=KS,KE
+ J=JA(K)
+ X(J)=X(J)+A(K)*BB
+20 CONTINUE
+30 CONTINUE
+ RETURN
+ END
+ SUBROUTINE SMUL30(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+C JLU GIVES COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS
+C B IS THE VECTOR
+C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B)
+C
+ DO 10 I=1,N
+ X(I)=0.0
+10 CONTINUE
+ DO 50 I=1,N
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ DIAG=1.0/ALU(KS-1)
+ XX=DIAG*B(I)
+ IF (KS.GT.KE) GO TO 30
+ DO 20 K=KS,KE
+ J=JLU(K)
+ XX=XX+ALU(K)*B(J)
+20 CONTINUE
+30 X(I)=X(I)+DIAG*XX
+ IF (KS.GT.KE) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)+ALU(K)*XX
+40 CONTINUE
+50 CONTINUE
+ RETURN
+ END
+ SUBROUTINE SSUBU0(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+C JLU GIVES COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U
+C B IS THE RHS VECTOR
+C X IS THE SOLUTION VECTOR
+C
+ NP=N+1
+ DO 10 I=1,N
+ X(I)=B(I)
+10 CONTINUE
+C FORWARD SUBSTITUTION
+ DO 30 I=1,N
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ XX=X(I)*ALU(KS-1)
+ X(I)=XX
+ IF (KS.GT.KE) GO TO 30
+ DO 20 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)-ALU(K)*XX
+20 CONTINUE
+30 CONTINUE
+C BACK SUBSTITUTION
+ DO 60 II=1,N
+ I=NP-II
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ SUM=0.0
+ IF (KS.GT.KE) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ SUM=SUM+ALU(K)*X(J)
+40 CONTINUE
+50 X(I)=(X(I)-SUM)*ALU(KS-1)
+60 CONTINUE
+ RETURN
+ END
+ SUBROUTINE SSUBL0(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU
+C JLU GIVES THE COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED
+C B IS THE RHS VECTOR
+C X IS THE SOLUTION VECTOR
+C
+ NP=N+1
+ DO 10 I=1,N
+ X(I)=B(I)
+10 CONTINUE
+C FORWARD SUBSTITUTION
+ DO 30 I=1,N
+ KS=ILU(I)
+ KE=ID(I)-1
+ IF (KS.GT.KE) GO TO 30
+ SUM=0.0
+ DO 20 K=KS,KE
+ J=JLU(K)
+ SUM=SUM+ALU(K)*X(J)
+20 CONTINUE
+ X(I)=X(I)-SUM
+30 CONTINUE
+C BACK SUBSTITUTION
+ DO 50 II=1,N
+ I=NP-II
+ KS=ILU(I)
+ KE=ID(I)-1
+ IF (KS.GT.KE) GO TO 50
+ XX=X(I)
+ IF (XX.EQ.0.0) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)-ALU(K)*XX
+40 CONTINUE
+50 CONTINUE
+ RETURN
+ END
+ REAL FUNCTION SGVV(V,W,N)
+ IMPLICIT REAL (A-H,O-Z)
+ DIMENSION V(N),W(N)
+C SUBROUTINE TO COMPUTE REAL VECTOR DOT PRODUCT.
+C
+ SUM = 0.0
+ DO 10 I = 1,N
+ SUM = SUM + V(I)*W(I)
+10 CONTINUE
+ SGVV = SUM
+ RETURN
+ END
+ LOGICAL FUNCTION DILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+C
+C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT
+C - -- - -
+C WHERE:
+C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND
+C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND
+C B AND X ARE THE NEW RHS AND INITIAL GUESS.
+C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE
+C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO
+C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1).
+C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES
+C THE COLUMN NUMBER OF A(K).
+C A IS A DOUBLE PRECISION ARRAY DIMENSIONED MAX. IT CONTAINS THE
+C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW.
+C B CONTAINS THE RHS VECTOR.
+C X IS A DOUBLE PRECISION ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS
+C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION.
+C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2.
+C RTEMP IS A DOUBLE PRECISION SCRATCH ARRAY DIMENSIONED 4*|N|+MAX.
+C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE
+C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE
+C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE
+C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE
+C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS
+C .LT. 0.0D0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION,
+C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO
+C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR.
+C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT".
+C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES
+C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE).
+C ON ERROR RETURN, THE ROW IN ERROR.
+C
+C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN.
+C
+C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.)
+C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.)
+C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.)
+C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.)
+C
+C REFERENCE:
+C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT
+C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS",
+C J.COMPUT.PHYS. JAN 1978 PP 43-65
+C
+ DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*)
+ LOGICAL DLU0
+ NP=IABS(N)
+ IE=0
+ IF (NP.EQ.0) GO TO 20
+C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS.
+ N1=NP+1
+ MAX=IA(N1)-IA(1)
+ ILU=1
+ JLU=ILU+N1
+ ID=JLU+MAX
+ IC=ID+NP
+ JC=IC+N1
+ JCI=JC+MAX
+ IR=1
+ IP=IR+NP
+ IS1=IP+NP
+ IS2=IS1+NP
+ IALU=IS2+NP
+ IF (N.LT.0) GO TO 10
+C DO INCOMPLETE LU DECOMPOSITION
+ IF (DLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU),
+ * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20
+C AND DO CONJUGATE GRADIENT ITERATIONS
+C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS
+C - J. THORNBURG, 17/MAY/85.
+10 CALL DNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID),
+ * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2),
+ * EPS,ITER,IE)
+ DILUCG = .FALSE.
+ RETURN
+20 DILUCG = .TRUE.
+ RETURN
+ END
+ LOGICAL FUNCTION DLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*),
+ * ALU(*),ILU(*),JLU(*),ID(N),V(N)
+ LOGICAL NODIAG
+ COMMON /ICBD00/ ICBAD
+C INCOMPLETE LU DECOMPOSITION
+C WHERE:
+C N,IA,JA, AND A ARE DESCRIBED IN SUBROUTINE ILUCG
+C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE
+C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN
+C ARRAY JC.
+C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN
+C THE COLUMN STRUCTURE.
+C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT
+C OF THE COLUMN STRUCTURE.
+C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL
+C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE
+C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U.
+C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU.
+C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS
+C INDICES TO THE DIAGONAL ELEMENTS OF U.
+C V IS A REAL SCRATCH VECTOR OF LENGTH N.
+C IE GIVES THE ROW NUMBER IN ERROR.
+C
+C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR.
+C
+C NOTE: DLU0 SETS ARGUMENTS IC THROUGH V.
+C
+ ICBAD=0
+C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U.
+C
+C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE
+ DO 10 I=1,N
+ IC(I)=0
+10 CONTINUE
+ DO 30 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ NODIAG=.TRUE.
+ DO 20 K=KS,KE
+ J=JA(K)
+ IF (J.LT.1.OR.J.GT.N) GO TO 210
+ IC(J)=IC(J)+1
+ IF (J.EQ.I) NODIAG=.FALSE.
+20 CONTINUE
+ IF (NODIAG) GO TO 210
+30 CONTINUE
+C MAKE IC INTO INDICES
+ KOLD=IC(1)
+ IC(1)=1
+ DO 40 I=1,N
+ KNEW=IC(I+1)
+ IF (KOLD.EQ.0) GO TO 210
+ IC(I+1)=IC(I)+KOLD
+ KOLD=KNEW
+40 CONTINUE
+C SET JC AND JCI FOR COLUMN STRUCTURE
+ DO 60 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ DO 50 K=KS,KE
+ J=JA(K)
+ L=IC(J)
+ IC(J)=L+1
+ JC(L)=I
+ JCI(L)=K
+50 CONTINUE
+60 CONTINUE
+C FIX UP IC
+ KOLD=IC(1)
+ IC(1)=1
+ DO 70 I=1,N
+ KNEW=IC(I+1)
+ IC(I+1)=KOLD
+ KOLD=KNEW
+70 CONTINUE
+C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE
+ NP=N+1
+ DO 80 I=1,NP
+ ILU(I)=IA(I)
+80 CONTINUE
+C MOVE ELEMENTS, SET JLU AND ID
+ DO 100 J=1,N
+ KS=IC(J)
+ KE=IC(J+1)-1
+ DO 90 K=KS,KE
+ I=JC(K)
+ L=ILU(I)
+ ILU(I)=L+1
+ JLU(L)=J
+ KK=JCI(K)
+ ALU(L)=A(KK)
+ IF (I.EQ.J) ID(J)=L
+90 CONTINUE
+100 CONTINUE
+C RESET ILU (COULD JUST USE IA)
+ DO 110 I=1,NP
+ ILU(I)=IA(I)
+110 CONTINUE
+C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE
+C
+C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION
+ DO 120 I=1,N
+ V(I)=0.0D0
+120 CONTINUE
+ DO 200 IROW=1,N
+ I=ID(IROW)
+ PIVOT=ALU(I)
+ IF (PIVOT.NE.0.0D0) GO TO 140
+C THIS CASE MAKES THE ILU LESS ACCURATE
+ ICBAD=ICBAD+1
+ KS=ILU(IROW)
+ KE=ILU(IROW+1)-1
+ DO 130 K=KS,KE
+ PIVOT=PIVOT+DABS(ALU(K))
+130 CONTINUE
+ IF (PIVOT.EQ.0.0D0) GO TO 220
+140 PIVOT=1.0D0/PIVOT
+ ALU(I)=PIVOT
+ KKS=I+1
+ KKE=ILU(IROW+1)-1
+ IF (KKS.GT.KKE) GO TO 160
+ DO 150 K=KKS,KKE
+ J=JLU(K)
+ V(J)=ALU(K)
+150 CONTINUE
+C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX
+160 KS=IC(IROW)
+ KE=IC(IROW+1)-1
+ DO 190 K=KS,KE
+ I=JC(K)
+ IF (I.LE.IROW) GO TO 190
+ LS=ILU(I)
+ LE=ILU(I+1)-1
+ DO 180 L=LS,LE
+ J=JLU(L)
+ IF (J.LT.IROW) GO TO 180
+ IF (J.GT.IROW) GO TO 170
+ AMULT=ALU(L)*PIVOT
+ ALU(L)=AMULT
+ IF (AMULT.EQ.0.0) GO TO 190
+ GO TO 180
+170 IF (V(J).EQ.0.0D0) GO TO 180
+ ALU(L)=ALU(L)-AMULT*V(J)
+180 CONTINUE
+190 CONTINUE
+C RESET V
+ IF (KKS.GT.KKE) GO TO 200
+ DO 195 K=KKS,KKE
+ J=JLU(K)
+ V(J)=0.0D0
+195 CONTINUE
+200 CONTINUE
+C NORMAL RETURN
+ DLU0 = .FALSE.
+ RETURN
+C ERROR RETURNS
+210 IE=I
+ DLU0 = .TRUE.
+ RETURN
+220 IE=IROW
+ DLU0 = .TRUE.
+ RETURN
+ END
+ SUBROUTINE DNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2,
+ * EPS,ITER,IE)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N),
+ * R(N),P(N),S1(N),S2(N)
+C NONSYMMETRIC CONJUGATE GRADIENT
+C WHERE:
+C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE DILUCG.
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU.
+C JLU GIVES COLUMN NUMBER.
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U.
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U.
+C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE
+C ITERATIONS.
+C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85.
+C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE
+C DILUCG).
+C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT".
+C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF
+C NO CONVERGENCE.
+C
+C R0=B-A*X0
+ CALL DMUL10(N,IA,JA,A,X,R)
+ DO 10 I=1,N
+ R(I)=B(I)-R(I)
+10 CONTINUE
+C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0
+C FIRST SOLVE L*LT*S1=R0
+ CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+C TIMES TRANSPOSE OF A
+ CALL DMUL20(N,IA,JA,A,S1,S2)
+C THEN SOLVE UT*U*P=S2
+ CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,P)
+ IE=0
+C INPROD IS DOT PRODUCT ROUTINE IN *LIBRARY (UBC MATRIX P 28)
+C ALL CALLS ON IT COMMENTED OUT AND REPLACED WITH CALLS TO NEW
+C ROUTINE DGVV(...); SOURCE CODE FOR LATTER ADDED TO END OF
+C THIS FILE. - J. THORNBURG, 10/MAY/85.
+C CALL INPROD(R,S1,EDOT,RDOT,N)
+ RDOT = DGVV(R,S1,N)
+C LOOP BEGINS HERE
+20 CALL DMUL30(N,ILU,JLU,ID,ALU,P,S2)
+C CALL INPROD(P,S2,EDOT,PDOT,N)
+ PDOT = DGVV(P,S2,N)
+C
+ IF (PDOT.EQ.0.0D0) RETURN
+C
+ ALPHA=RDOT/PDOT
+C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P)
+ XMAX=1.0D0
+ XDIF=0.0D0
+ DO 30 I=1,N
+ AP=ALPHA*P(I)
+ X(I)=X(I)+AP
+C EQUATION 9PB X=X+ALPHA*P
+ AP=DABS(AP)
+ XX=DABS(X(I))
+ IF (AP.GT.XDIF) XDIF=AP
+ IF (XX.GT.XMAX) XMAX=XX
+30 CONTINUE
+ IE=IE+1
+C
+C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89)
+C
+ IF ((EPS .GT. 0.0D0) .AND. (XDIF .LE. EPS * XMAX)) RETURN
+ IF ((EPS .LT. 0.0D0) .AND. (XMAX + XDIF/DABS(EPS) .EQ. XMAX))
+ * RETURN
+C
+C EXCEEDED ITERATION LIMIT?
+C
+ IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60
+ CALL DMUL10(N,IA,JA,A,P,S2)
+ DO 40 I=1,N
+ R(I)=R(I)-ALPHA*S2(I)
+C EQUATION 9PC R=R-ALPHA*A*P
+40 CONTINUE
+ CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+C CALL INPROD(R,S1,EDOT,RRDOT,N)
+ RRDOT = DGVV(R,S1,N)
+ BETA=RRDOT/RDOT
+C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R)
+ RDOT=RRDOT
+ CALL DMUL20(N,IA,JA,A,S1,S2)
+ CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,S1)
+ DO 50 I=1,N
+ P(I)=S1(I)+BETA*P(I)
+C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P
+50 CONTINUE
+ GO TO 20
+60 IE=-IE
+ RETURN
+ END
+ SUBROUTINE DMUL10(N,IA,JA,A,B,X)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+C MULTIPLY A TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+C JA GIVES COLUMN NUMBER
+C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+C MATRIX STORED BY ROW
+C B IS THE VECTOR
+C X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+C
+ DO 20 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ SUM=0.0D0
+ DO 10 K=KS,KE
+ J=JA(K)
+ SUM=SUM+A(K)*B(J)
+10 CONTINUE
+ X(I)=SUM
+20 CONTINUE
+ RETURN
+ END
+ SUBROUTINE DMUL20(N,IA,JA,A,B,X)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+C MULTIPLY TRANSPOSE OF A TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+C JA GIVES COLUMN NUMBER
+C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+C MATRIX STORED BY ROW
+C B IS THE VECTOR
+C X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+C
+ DO 10 I=1,N
+ X(I)=0.0D0
+10 CONTINUE
+ DO 30 I=1,N
+ KS=IA(I)
+ KE=IA(I+1)-1
+ BB=B(I)
+ DO 20 K=KS,KE
+ J=JA(K)
+ X(J)=X(J)+A(K)*BB
+20 CONTINUE
+30 CONTINUE
+ RETURN
+ END
+ SUBROUTINE DMUL30(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+C JLU GIVES COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS
+C B IS THE VECTOR
+C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B)
+C
+ DO 10 I=1,N
+ X(I)=0.0D0
+10 CONTINUE
+ DO 50 I=1,N
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ DIAG=1.0D0/ALU(KS-1)
+ XX=DIAG*B(I)
+ IF (KS.GT.KE) GO TO 30
+ DO 20 K=KS,KE
+ J=JLU(K)
+ XX=XX+ALU(K)*B(J)
+20 CONTINUE
+30 X(I)=X(I)+DIAG*XX
+ IF (KS.GT.KE) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)+ALU(K)*XX
+40 CONTINUE
+50 CONTINUE
+ RETURN
+ END
+ SUBROUTINE DSUBU0(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+C JLU GIVES COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW
+C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U
+C B IS THE RHS VECTOR
+C X IS THE SOLUTION VECTOR
+C
+ NP=N+1
+ DO 10 I=1,N
+ X(I)=B(I)
+10 CONTINUE
+C FORWARD SUBSTITUTION
+ DO 30 I=1,N
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ XX=X(I)*ALU(KS-1)
+ X(I)=XX
+ IF (KS.GT.KE) GO TO 30
+ DO 20 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)-ALU(K)*XX
+20 CONTINUE
+30 CONTINUE
+C BACK SUBSTITUTION
+ DO 60 II=1,N
+ I=NP-II
+ KS=ID(I)+1
+ KE=ILU(I+1)-1
+ SUM=0.0D0
+ IF (KS.GT.KE) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ SUM=SUM+ALU(K)*X(J)
+40 CONTINUE
+50 X(I)=(X(I)-SUM)*ALU(KS-1)
+60 CONTINUE
+ RETURN
+ END
+ SUBROUTINE DSUBL0(N,ILU,JLU,ID,ALU,B,X)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B
+C WHERE:
+C N IS THE ORDER OF THE MATRIX
+C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU
+C JLU GIVES THE COLUMN NUMBER
+C ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED
+C B IS THE RHS VECTOR
+C X IS THE SOLUTION VECTOR
+C
+ NP=N+1
+ DO 10 I=1,N
+ X(I)=B(I)
+10 CONTINUE
+C FORWARD SUBSTITUTION
+ DO 30 I=1,N
+ KS=ILU(I)
+ KE=ID(I)-1
+ IF (KS.GT.KE) GO TO 30
+ SUM=0.0D0
+ DO 20 K=KS,KE
+ J=JLU(K)
+ SUM=SUM+ALU(K)*X(J)
+20 CONTINUE
+ X(I)=X(I)-SUM
+30 CONTINUE
+C BACK SUBSTITUTION
+ DO 50 II=1,N
+ I=NP-II
+ KS=ILU(I)
+ KE=ID(I)-1
+ IF (KS.GT.KE) GO TO 50
+ XX=X(I)
+ IF (XX.EQ.0.0) GO TO 50
+ DO 40 K=KS,KE
+ J=JLU(K)
+ X(J)=X(J)-ALU(K)*XX
+40 CONTINUE
+50 CONTINUE
+ RETURN
+ END
+ DOUBLE PRECISION FUNCTION DGVV(V,W,N)
+ IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+ DIMENSION V(N),W(N)
+C SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT.
+C
+ SUM = 0.0D0
+ DO 10 I = 1,N
+ SUM = SUM + V(I)*W(I)
+10 CONTINUE
+ DGVV = SUM
+ RETURN
+ END
diff --git a/src/elliptic/ilucg_wrapper.F77 b/src/elliptic/ilucg_wrapper.F77
new file mode 100644
index 0000000..d1ab803
--- /dev/null
+++ b/src/elliptic/ilucg_wrapper.F77
@@ -0,0 +1,72 @@
+c ilucg_wrapper.F77 -- wrapper routines for [sd]ilucg()
+c $Header$
+
+c
+c These subroutines are wrappers around the [sd]ilucg() functions.
+c These subroutines take only integer/real/double precision arguments,
+c avoiding problems with C/C++ --> Fortran handling of the logical
+c result returned by [sd]ilucg().
+c
+c Arguments:
+c istatus = (out) This is the "IE" return value from [sd]ilucg()
+c ierror = (out) This is returned as 0 for a normal return,
+c nonzero for an error return.
+c
+
+#include "config.h"
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ subroutine silucg_wrapper(n,
+ $ ia, ja, a,
+ $ b, x,
+ $ itemp, rtemp,
+ $ eps, iter,
+ $ istatus, ierror)
+ integer n
+ integer ia(*), ja(*)
+ real a(*), b(*), x(*)
+ integer itemp(*)
+ real rtemp(*)
+ real eps
+ integer iter, istatus, ierror
+c
+ logical silucg
+c
+ ierror = 0
+ if (silucg(n,
+ $ ia, ja, a,
+ $ b, x,
+ $ itemp, rtemp,
+ $ eps, iter,
+ $ istatus)) ierror = 1
+ return
+ end
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+ subroutine dilucg_wrapper(n,
+ $ ia, ja, a,
+ $ b, x,
+ $ itemp, rtemp,
+ $ eps, iter,
+ $ istatus, ierror)
+ integer n
+ integer ia(*), ja(*)
+ double precision a(*), b(*), x(*)
+ integer itemp(*)
+ double precision rtemp(*)
+ double precision eps
+ integer iter, istatus, ierror
+c
+ logical dilucg
+c
+ ierror = 0
+ if (dilucg(n,
+ $ ia, ja, a,
+ $ b, x,
+ $ itemp, rtemp,
+ $ eps, iter,
+ $ istatus)) ierror = 1
+ return
+ end
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG
diff --git a/src/elliptic/lapack.h b/src/elliptic/lapack.h
index 1e86616..0927bff 100644
--- a/src/elliptic/lapack.h
+++ b/src/elliptic/lapack.h
@@ -4,7 +4,7 @@
/*
* prerequisites:
* "cctk.h"
- * "config.hh" // for "integer" = Fortran integer
+ * "config.h" // for "integer" = Fortran integer
*/
#ifdef __cplusplus
diff --git a/src/elliptic/lapack_wrapper.F77 b/src/elliptic/lapack_wrapper.F77
new file mode 100644
index 0000000..25796b3
--- /dev/null
+++ b/src/elliptic/lapack_wrapper.F77
@@ -0,0 +1,59 @@
+c lapack_wrapper.f -- wrapper routines for LAPACK [sd]gecon()
+c $Header$
+
+c
+c These subroutines are wrappers around the LAPACK [sd]gecon() subroutines.
+c These subroutines take only integer/real/double precision arguments,
+c avoiding problems with C/C++ --> Fortran passing of the character string
+c arguments used by [sd]gecon().
+c
+c Arguments:
+c norm_int = (in) 0 ==> infinity-norm
+c 1 ==> 1-norm
+c
+
+#include "config.h"
+
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ subroutine sgecon_wrapper(norm_int,
+ $ N, A, LDA, anorm, rcond,
+ $ WORK, IWORK, info)
+ integer norm_int
+ integer N, LDA
+ real A(LDA,N)
+ real anorm, rcond
+ real WORK(*)
+ integer iwork(*)
+ integer info
+ if (norm_int .eq. 0) then
+ call sgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info)
+ else if (norm_int .eq. 1) then
+ call sgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info)
+ else
+ info = -1;
+ end if
+ return
+ end
+#endif /* HAVE_DENSE_JACOBIAN__LAPACK */
+
+#ifdef HAVE_DENSE_JACOBIAN__LAPACK
+ subroutine dgecon_wrapper(norm_int,
+ $ N, A, LDA, anorm, rcond,
+ $ WORK, IWORK, info)
+ integer norm_int
+ integer N, LDA
+ double precision A(LDA,N)
+ double precision anorm, rcond
+ double precision WORK(*)
+ integer iwork(*)
+ integer info
+ if (norm_int .eq. 0) then
+ call dgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info)
+ else if (norm_int .eq. 1) then
+ call dgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info)
+ else
+ info = -1;
+ end if
+ return
+ end
+#endif /* HAVE_DENSE_JACOBIAN__LAPACK */
diff --git a/src/elliptic/make.code.defn b/src/elliptic/make.code.defn
index 28308a0..553d372 100644
--- a/src/elliptic/make.code.defn
+++ b/src/elliptic/make.code.defn
@@ -3,8 +3,8 @@
# Source files in this directory
SRCS = Jacobian.cc \
- dense_Jacobian.cc gecon_wrapper.F77 \
- row_sparse_Jacobian.cc
+ dense_Jacobian.cc lapack_wrapper.F77 \
+ row_sparse_Jacobian.cc ilucg_wrapper.F77 ilucg.f77
# Subdirectories containing source files
SUBDIRS =
diff --git a/src/elliptic/row_sparse_Jacobian.cc b/src/elliptic/row_sparse_Jacobian.cc
index 131655c..7d0be44 100644
--- a/src/elliptic/row_sparse_Jacobian.cc
+++ b/src/elliptic/row_sparse_Jacobian.cc
@@ -2,22 +2,28 @@
// $Header$
//
+// <<<liberal contents of "ilucg.h">>>
+//
#ifdef HAVE_ROW_SPARSE_JACOBIAN
-// row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity
-// row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity
-// row_sparse_Jacobian_sparsity::zero_matrix
-// row_sparse_Jacobian_sparsity::note_new_row
-// row_sparse_Jacobian_sparsity::note_element
+// row_sparse_Jacobian::row_sparse_Jacobian
+// row_sparse_Jacobian::~row_sparse_Jacobian
+// row_sparse_Jacobian::element
+// row_sparse_Jacobian::zero_matrix
+// row_sparse_Jacobian::set_element
+// row_sparse_Jacobian::sum_into_element
+/// row_sparse_Jacobian::find_element
+/// row_sparse_Jacobian::insert_element
+ #ifdef DEBUG_ROW_SPARSE_JACOBIAN
+/// row_sparse_Jacobian::print_data_structure
+ #endif
#endif
//
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
-// row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix
-// row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix
-// row_sparse_Jacobian_matrix::zero_matrix
-// row_sparse_Jacobian_matrix::find_element
-// row_sparse_Jacobian_matrix::start_new_row
-// row_sparse_Jacobian_matrix::insert_element
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+// row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG
+// row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG
+// row_sparse_Jacobian__ILUCG::solve_linear_system
#endif
+//
#include <stdio.h>
#include <assert.h>
@@ -52,16 +58,102 @@ using jtutil::error_exit;
//******************************************************************************
//******************************************************************************
+//
+// FIXME:
+// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug),
+// so we include the contents of "lapack.h" inline here. This is a
+// kludge! :( :(
+//#include "ilucg.h"
+//
+
+//***** begin "ilucg.h" contents ******
+/* ilucg.h -- C/C++ prototypes for [sd]ilucg() routines */
+/* $Header$ */
+
+/*
+ * prerequisites:
+ * "cctk.h"
+ * "config.h" // for "integer" = Fortran integer
+ */
+
+#ifdef __cplusplus
+extern "C"
+ {
+#endif
+
+/*
+ * ***** ILUCG *****
+ */
+integer CCTK_FCALL
+ CCTK_FNAME(silucg)(const integer* N,
+ const integer ia[], const integer ja[], const float a[],
+ const float b[], float x[],
+ integer itemp[], float rtemp[],
+ const float* eps, const integer* iter,
+ integer* istatus);
+integer CCTK_FCALL
+ CCTK_FNAME(dilucg)(const integer* N,
+ const integer ia[], const integer ja[], const double a[],
+ const double b[], double x[],
+ integer itemp[], double rtemp[],
+ const double* eps, const integer* iter,
+ integer* istatus);
+
+/*
+ * ***** wrappers (for returning logical results) *****
+ */
+void CCTK_FCALL
+ CCTK_FNAME(silucg_wrapper)
+ (const integer* N,
+ const integer ia[], const integer ja[], const float a[],
+ const float b[], float x[],
+ integer itemp[], float rtemp[],
+ const float* eps, const integer* iter,
+ integer* istatus, integer* ierror);
+void CCTK_FCALL
+ CCTK_FNAME(dilucg_wrapper)
+ (const integer* N,
+ const integer ia[], const integer ja[], const double a[],
+ const double b[], double x[],
+ integer itemp[], double rtemp[],
+ const double* eps, const integer* iter,
+ integer* istatus, integer* ierror);
+
+#ifdef __cplusplus
+ } /* extern "C" */
+#endif
+//***** end "ilucg.h" contents ******
+
+//******************************************************************************
+//******************************************************************************
+//******************************************************************************
+
#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function constructs a row_sparse_Jacobian_sparsity object.
+// This function constructs a row_sparse_Jacobian object.
//
-row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity
- (patch_system& ps,
- bool print_msg_flag /* = false */)
- : Jacobian_sparsity(ps, print_msg_flag),
- JJs_at_this_II_(new int[NN()])
+row_sparse_Jacobian::row_sparse_Jacobian(patch_system& ps,
+ bool print_msg_flag /* = false */)
+ : Jacobian(ps),
+ N_nonzeros_(0),
+ N_nonzeros_allocated_(FD_GRID__MOL_AREA * N_rows_),
+ // initial guess, insert_element()
+ // will grow if/when necessary
+ current_N_rows_(0),
+ IA_(new integer[N_rows_+1]),
+ JA_(new integer[N_nonzeros_allocated_]),
+ A_(new fp [N_nonzeros_allocated_])
{
+if (print_msg_flag)
+ then {
+ CCTK_VInfo(CCTK_THORNSTRING,
+ "constructing row_sparse_Jacobian: N_rows_=%d",
+ N_rows_);
+ CCTK_VInfo(CCTK_THORNSTRING,
+ " initial N_nonzeros_allocated_=%d",
+ N_nonzeros_allocated_);
+ }
+
zero_matrix();
}
#endif // HAVE_ROW_SPARSE_JACOBIAN
@@ -70,11 +162,13 @@ zero_matrix();
#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function destroys a row_sparse_Jacobian_sparsity object.
+// This function destroys a row_sparse_Jacobian object.
//
-row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity()
+row_sparse_Jacobian::~row_sparse_Jacobian()
{
-delete[] JJs_at_this_II_;
+delete[] A_;
+delete[] JA_;
+delete[] IA_;
}
#endif // HAVE_ROW_SPARSE_JACOBIAN
@@ -82,13 +176,13 @@ delete[] JJs_at_this_II_;
#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function notes the zeroing of the entire matrix
+// This function gets a matrix element.
//
-void row_sparse_Jacobian_sparsity::zero_matrix()
+fp row_sparse_Jacobian::element(int II, int JJ)
+ const
{
-N_nonzeros_ = 0;
-current_II_ = 0;
-N_JJs_at_this_II_ = 0;
+const int fposn = find_element(II,JJ);
+return (fposn > 0) ? fA(fposn) : 0.0;
}
#endif // HAVE_ROW_SPARSE_JACOBIAN
@@ -96,12 +190,16 @@ N_JJs_at_this_II_ = 0;
#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function notes that we're starting a new row in the matrix traversal.
+// This function zeros a row_sparse_Jacobian object.
//
-void row_sparse_Jacobian_sparsity::note_new_row(int II)
+void row_sparse_Jacobian::zero_matrix()
{
-current_II_ = II;
-N_JJs_at_this_II_ = 0;
+#ifdef DEBUG_ROW_SPARSE_JACOBIAN
+printf("row_sparse_Jacobian::zero_matrix()\n");
+#endif
+N_nonzeros_ = 0;
+current_N_rows_= 0;
+fIA(1) = 1;
}
#endif // HAVE_ROW_SPARSE_JACOBIAN
@@ -109,120 +207,269 @@ N_JJs_at_this_II_ = 0;
#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function notes that the (II,JJ) matrix element needs to be stored.
+// This function sets a matrix element to a specified value.
//
-void row_sparse_Jacobian_sparsity::note_element(int II, int JJ)
+void row_sparse_Jacobian::set_element(int II, int JJ, fp value)
{
-if (II != current_II_)
- then {
- N_nonzeros_ += N_JJs_at_this_II_; // update totals for old row
- note_new_row(II);
- }
-
-// have we already seen this JJ in this row?
- for (int jindex = 0 ; jindex < N_JJs_at_this_II_ ; ++jindex)
- {
- if (JJs_at_this_II_[jindex] == JJ)
- then {
- // yes, we've already seen it
- // ==> no-op here
- return;
- }
- }
-
-// get to here ==> no, we haven't seen this JJ before in this roW
-// ==> add it to this row
-if (N_JJs_at_this_II_ >= NN())
- then error_exit(ERROR_EXIT,
-"***** row_sparse_Jacobian_sparsity():\n"
-" row buffer overflowed (this should never happen!)\n"
-" before insertion N_JJs_at_this_II_=%d >= NN()=%d\n"
-" N_nonzeros_=%d current_II_=%d\n"
-" II=%d JJ=%d\n"
- ,
- N_JJs_at_this_II_, NN(),
- N_nonzeros_, current_II_,
- II, JJ); /*NOTREACHED*/
-JJs_at_this_II_[N_JJs_at_this_II_++] = JJ;
+const int fposn = find_element(II,JJ);
+if (fposn > 0)
+ then fA(fposn) = value;
+ else insert_element(II,JJ, value);
}
#endif // HAVE_ROW_SPARSE_JACOBIAN
//******************************************************************************
-//******************************************************************************
-//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function constructs a row_sparse_Jacobian_matrix object.
+// This function sums a value into a matrix element.
//
-row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix
- (patch_system& ps,
- row_sparse_Jacobian_sparsity& JS,
- bool print_msg_flag /* = false */)
- : Jacobian_matrix(ps, JS),
- N_nonzeros_(JS.N_nonzeros()),
- A_(new fp[N_nonzeros_]),
- IA_(new integer[NN()+1]),
- JA_(new integer[N_nonzeros_])
+void row_sparse_Jacobian::sum_into_element(int II, int JJ, fp value)
{
-if (print_msg_flag)
- then CCTK_VInfo(CCTK_THORNSTRING,
- "row_sparse_Jacobian_matrix ctor: N_nonzeros_=%d",
- N_nonzeros_);
-
-zero_matrix();
+const int fposn = find_element(II,JJ);
+if (fposn > 0)
+ then fA(fposn) += value;
+ else insert_element(II,JJ, value);
}
+#endif // HAVE_ROW_SPARSE_JACOBIAN
//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function destroys a row_sparse_Jacobian_matrix object.
+// This function searches our data structures for element (II,JJ).
+// If found, it returns the position (1-origin) in A_ and JA_.
+// If not found, it returns 0.
//
-row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix()
+int row_sparse_Jacobian::find_element(int II, int JJ)
+ const
{
-delete[] JA_;
-delete[] IA_;
-delete[] A_;
+const int fII = fsub(II);
+const int fJJ = fsub(JJ);
+
+if (fII > current_N_rows_)
+ then return 0; // this row not defined yet
+
+const int fstart = fIA(fII);
+const int fstop = fIA(fII + 1);
+ for (int fposn = fstart ; fposn < fstop ; ++fposn)
+ {
+ if (fJA(fposn) == fJJ)
+ then return fposn; // found
+ }
+
+return 0; // not found
}
+#endif // HAVE_ROW_SPARSE_JACOBIAN
//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN
//
-// This function zeros a row_sparse_Jacobian_matrix object.
+// This function inserts a new element in the matrix, starting a new
+// row and/or growing the arrays if necessary. It should only be called
+// if JJ isn't already in this row.
//
-void row_sparse_Jacobian_matrix::zero_matrix()
+int row_sparse_Jacobian::insert_element(int II, int JJ, fp value)
{
-current_N_nonzeros_ = 0;
-current_II_ = 0;
-JA_[current_II_] = fsub(0);
+const int fII = fsub(II);
+const int fJJ = fsub(JJ);
+
+#ifdef DEBUG_ROW_SPARSE_JACOBIAN
+printf(
+ "row_sparse_Jacobian::insert_element(): fII=%d fJJ=%d\n",
+ fII, fJJ);
+#endif
+
+if (fII != current_N_rows_)
+ then {
+ #ifdef DEBUG_ROW_SPARSE_JACOBIAN
+ printf(" starting new row\n");
+ #endif
+ assert(fII == current_N_rows_+1);
+ assert(current_N_rows_ < N_rows_);
+ ++current_N_rows_;
+ fIA(current_N_rows_+1) = fIA(current_N_rows_);
+ #ifdef DEBUG_ROW_SPARSE_JACOBIAN
+ print_data_structure();
+ #endif
+ }
+
+if (fIA(fII+1) > N_nonzeros_allocated_)
+ then {
+ #ifdef DEBUG_ROW_SPARSE_JACOBIAN
+ printf(" growing arrays from N_nonzeros_allocated_=%d",
+ N_nonzeros_allocated_);
+ #endif
+ N_nonzeros_allocated_ += (N_nonzeros_allocated_ >> 1);
+ #ifdef DEBUG_ROW_SPARSE_JACOBIAN
+ printf(" to %d\n", N_nonzeros_allocated_);
+ #endif
+ integer* const new_JA = new integer[N_nonzeros_allocated_];
+ fp * const new_A = new fp [N_nonzeros_allocated_];
+ for (int posn = 0 ; posn < N_nonzeros_ ; ++posn)
+ {
+ new_JA[posn] = JA_[posn];
+ new_A[posn] = A_[posn];
+ }
+ delete[] A_;
+ delete[] JA_;
+ JA_ = new_JA;
+ A_ = new_A;
+ }
+
+++N_nonzeros_;
+const int fposn = fIA(fII+1)++;
+fJA(fposn) = fJJ;
+ fA(fposn) = value;
+#ifdef DEBUG_ROW_SPARSE_JACOBIAN
+printf(" storing at fposn=%d (new N_nonzeros_=%d)\n", fposn, N_nonzeros_);
+#endif
+return fposn;
}
+#endif // HAVE_ROW_SPARSE_JACOBIAN
//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN
+#ifdef DEBUG_ROW_SPARSE_JACOBIAN
//
-// This function searches our data structures for element (II,JJ).
-// If found, it returns the position (0-origin) in A_ and JA_.
-// If not found, it returns -1.
+// This function prints the sparse-matrix data structure.
//
-int row_sparse_Jacobian_matrix::find_element(int II, int JJ)
+void row_sparse_Jacobian::print_data_structure()
const
{
-const int start = IA(II);
-const int stop = IA(II+1);
- for (int posn = start ; posn < stop ; ++posn)
+printf("--- begin Jacobian printout\n");
+ for (int fII = 1 ; fII <= current_N_rows_ ; ++fII)
{
- if (JA(posn) == JJ)
- then return posn; // found
+ printf("fII=%d", fII);
+ const int fstart = fIA(fII);
+ const int fstop = fIA(fII + 1);
+ printf("\tfposn=[%d,%d):", fstart, fstop);
+ printf("\tfJJ =");
+ for (int fposn = fstart ; fposn < fstop ; ++fposn)
+ {
+ printf(" %d", fJA(fposn));
+ }
+ printf("\n");
}
+printf("--- end Jacobian printout\n");
+}
+#endif // DEBUG_ROW_SPARSE_JACOBIAN
+#endif // HAVE_ROW_SPARSE_JACOBIAN
+
+//******************************************************************************
+//******************************************************************************
+//******************************************************************************
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+//
+// This function constructs a row_sparse_Jacobian__ILUCG object.
+//
+row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG
+ (patch_system& ps,
+ bool print_msg_flag /* = false */)
+ : row_sparse_Jacobian(ps, print_msg_flag),
+ print_msg_flag_(print_msg_flag),
+ itemp_(NULL),
+ rtemp_(NULL)
+{ }
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG
-return -1; // not found
+//******************************************************************************
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+//
+// This function destroys a row_sparse_Jacobian__ILUCG object.
+//
+row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG()
+{
+delete[] rtemp_;
+delete[] itemp_;
}
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG
//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
//
-// This function starts a new row in the matrix.
+// This function solves the linear system J.x = rhs, with rhs and x
+// being nominal-grid gridfns, using an ILUCG subroutine originally
+// provided to me in 1985 (!) by Tom Nicol of the UBC Computing Center.
+// It returns -1.0 (no condition number estimate).
//
-void row_sparse_Jacobian_matrix::start_new_row(int II)
+fp row_sparse_Jacobian__ILUCG::solve_linear_system(int rhs_gfn, int x_gfn)
{
-// FIXME FIXME
+assert(current_N_rows_ == N_rows_);
+
+//
+// if this is our first call, allocate the scratch arrays
+//
+if (itemp_ == NULL)
+ then {
+ if (print_msg_flag_)
+ then {
+ CCTK_VInfo(CCTK_THORNSTRING,
+ "row_sparse_Jacobian__ILUCG::solve_linear_system()");
+ CCTK_VInfo(CCTK_THORNSTRING,
+ " N_rows_=%d N_nonzeros_=%d",
+ N_rows_, N_nonzeros_);
+ CCTK_VInfo(CCTK_THORNSTRING,
+ " N_nonzeros_allocated_=%d",
+ N_nonzeros_allocated_);
+ }
+ itemp_ = new integer[3*N_rows_ + 3*N_nonzeros_ + 2];
+ rtemp_ = new fp [4*N_rows_ + N_nonzeros_ ];
+ }
+
+//
+// set up the ILUCG subroutine
+//
+
+// initial guess = all zeros
+fp *x = ps_.gridfn_data(x_gfn);
+ for (int II = 0 ; II < N_rows_ ; ++II)
+ {
+ x[II] = 0.0;
+ }
+
+const integer N = N_rows_;
+const fp *rhs = ps_.gridfn_data(rhs_gfn);
+const fp eps = -1000.0; // solve to 1000 * machine epsilon
+const integer max_iterations = 0; // no limit on iterations
+integer istatus, ierror;
+
+// the actual linear solution
+#if defined(FP_IS_FLOAT)
+ CCTK_FNAME(silucg_wrapper)(&N,
+ IA_, JA_, A_,
+ rhs, x,
+ itemp_, rtemp_,
+ &eps, &max_iterations,
+ &istatus, &ierror);
+#elif defined(FP_IS_DOUBLE)
+ CCTK_FNAME(dilucg_wrapper)(&N,
+ IA_, JA_, A_,
+ rhs, x,
+ itemp_, rtemp_,
+ &eps, &max_iterations,
+ &istatus, &ierror);
+#else
+ #error "don't know fp datatype!"
+#endif
+
+if (ierror != 0)
+ then CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING,
+"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
+" error return istatus=%d from [sd]ilucg() routine!"
+ ,
+ rhs_gfn, x_gfn,
+ int(istatus)); /*NOTREACHED*/
+if (print_msg_flag_)
+ then CCTK_VInfo(CCTK_THORNSTRING,
+ " solution found in %d CG iterations",
+ istatus);
+
+return -1.0; // no condition number estimate available
}
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG
diff --git a/src/elliptic/row_sparse_Jacobian.hh b/src/elliptic/row_sparse_Jacobian.hh
index 6e4d962..2eb6f6d 100644
--- a/src/elliptic/row_sparse_Jacobian.hh
+++ b/src/elliptic/row_sparse_Jacobian.hh
@@ -3,8 +3,10 @@
//
#ifdef HAVE_ROW_SPARSE_JACOBIAN
-// row_sparse_Jacobian_sparsity -- (no-op) accumulate ... sparsity info
-// row_sparse_Jacobian_matrix -- Jacobian stored as row-oriented sparse matrix
+// row_sparse_Jacobian -- Jacobian stored as row-oriented sparse matrix
+#endif
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+// row_sparse_Jacobian__ILUCG -- row_sparse_Jacobian with ILUCG linear solver
#endif
//
@@ -14,183 +16,187 @@
// "Jacobian.hh"
//
+#undef DEBUG_ROW_SPARSE_JACOBIAN // defined this for *very* detailed
+ // printing of data structures
+
//******************************************************************************
+#ifdef HAVE_ROW_SPARSE_JACOBIAN
+//
+// This class stores the Jacobian as a row-oriented sparse matrix.
//
-// The row-oriented sparse matrix format is widely used by sparse matrix
-// libraries. In this format, the matrix is represented by 3 arrays:
-// A[N_nonzeros] = nonzero matrix elements, ordered by rows;
+// This sparse matrix format is widely used by sparse matrix libraries.
+// The format is as follows: The matrix is represented by 3 arrays
+// (where () denote Fortran 1-origin subscripting):
+// IA(N_rows+1) = Fortran (1-origin) indices in A of the start
+// of each row's nonzeros; IA(N_rows+1) = N_nonzeros + 1
+// JA(N_nonzeros) = Fortran (1-origin) column numbers of the
+// corresponding entries in A
+// A(N_nonzeros) = nonzero matrix elements, ordered by rows;
// within a row the matrix elements may be in
// any order
-// IA[N_rows+1] = Fortran (1-origin) indices in A of the start
-// of each row's nonzeros
-// JA[N_nonzeros] = Fortran (1-origin) column numbers of the
-// corresponding entries in A
// IA and JA are arrays of integer so as to be storage-compatible with
// Fortran.
//
-
-//******************************************************************************
-
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
-//
-// This class accumulates the sparsity pattern of a row-oriented
-// sparse-matrix Jacobian. We assume that the Jacobian is traversed
-// by rows.
+// For example, the matrix
+// [ 11.0 12.0 13.0 16.0 ]
+// [ 21.0 22.0 24.0 25.0 ]
+// [ 32.0 34.0 ]
+// [ 42.0 44.0 46.0 ]
+// [ 53.0 55.0 ]
+// [ 61.0 64.0 66.0 ]
+// could be represented by the arrays (Fortran indexing)
+// IA( 1) = 1
+// JA( 1) = 1 JA( 2) = 2 JA( 3) = 3 JA( 4) = 6
+// A( 1) = 11.0 A( 2) = 12.0 A( 3) = 13.0 A( 4) = 16.0
+// IA( 2) = 5
+// JA( 5) = 1 JA( 6) = 2 JA( 7) = 4 JA( 8) = 5
+// A( 5) = 21.0 A( 6) = 22.0 A( 7) = 24.0 A( 8) = 25.0
+// IA( 3) = 9
+// JA( 9) = 2 JA(10) = 4
+// A( 9) = 32.0 A(10) = 34.0
+// IA( 4) = 11
+// JA(11) = 2 JA(12) = 4 JA(13) = 6
+// A(11) = 42.0 A(12) = 44.0 A(13) = 46.0
+// IA( 5) = 14
+// JA(14) = 3 JA(15) = 5
+// A(14) = 53.0 A(15) = 55.0
+// IA( 6) = 16
+// JA(16) = 1 JA(17) = 4 JA(18) = 6
+// A(16) = 61.0 A(17) = 64.0 A(18) = 66.0
+// IA( 7) = 19
//
-class row_sparse_Jacobian_sparsity
- : public Jacobian_sparsity
+class row_sparse_Jacobian
+ : public Jacobian
{
public:
- // record the zeroing of the entire matrix
+ //
+ // routines to access the matrix
+ //
+
+ // get a matrix element
+ fp element(int II, int JJ) const;
+
+ // is the matrix element (II,JJ) stored explicitly?
+ bool is_explicitly_stored(int II, int JJ) const
+ { return find_element(II,JJ) > 0; }
+
+
+ //
+ // routines for setting values in the matrix
+ //
+
+ // zero the entire matrix
void zero_matrix();
- // record the setting of a matrix element to a specified value
- void set_element(int II, int JJ, fp value)
- { note_element(II, JJ); }
+ // set a matrix element to a specified value
+ void set_element(int II, int JJ, fp value);
- // record the summing of a value into a matrix element
- void sum_into_element(int II, int JJ, fp value)
- { note_element(II, JJ); }
+ // sum a value into a matrix element
+ void sum_into_element(int II, int JJ, fp value);
- // read out total number of nonzeros
- int N_nonzeros() const
- { return N_nonzeros_; }
+ //
// constructor, destructor
- row_sparse_Jacobian_sparsity(patch_system& ps,
- bool print_msg_flag = false);
- ~row_sparse_Jacobian_sparsity();
-
-private:
- // note start of a new row in the matrix traversal
- void note_new_row(int II);
+ //
+protected:
+ row_sparse_Jacobian(patch_system& ps,
+ bool print_msg_flag = false);
+ ~row_sparse_Jacobian();
- // note that the (II,JJ) matrix element needs to be stored
- void note_element(int II, int JJ);
-private:
//
- // Our goal is to count the total number of distinct matrix
- // elements stored. The problem is, there's no easy way to
- // know what size buffers to allocate to keep track of them.
- // We could use growing buffers, but instead we keep track of
- // the set of distinct columns (JJ) seen within each row (II).
- // (Here a buffer of size NN() is guaranteed to be sufficient.)
- // Then at the end of each row we can update our running total.
- //
- int N_nonzeros_;
- int current_II_;
- int N_JJs_at_this_II_;
-
- // --> new[]-allocated array of JJ values for nonzeros in this row
- // only values [0, N_JJs_at_this_II) used
- int* JJs_at_this_II_;
- };
-#endif // HAVE_ROW_SPARSE_JACOBIAN
+ // internal routines
+ //
+protected:
+ // return position (1-origin) of (II,JJ) in A_ and JA_
+ // return 0 if not found
+ int find_element(int II, int JJ) const;
-//******************************************************************************
+ // insert new array element (II,JJ) in matrix,
+ // starting new row and/or growing arrays if necessary
+ // return position (1-origin) in A_ and JA_
+ int insert_element(int II, int JJ, fp value);
-#ifdef HAVE_ROW_SPARSE_JACOBIAN
-//
-// This class stores the Jacobian as a row-oriented sparse matrix.
-//
-class row_sparse_Jacobian_matrix
- : public Jacobian_matrix
- {
-public:
- // zero the entire matrix
- void zero_matrix();
+#ifdef DEBUG_ROW_SPARSE_JACOBIAN
+ // print data structure (IA_ and JA_ arrays)
+ void print_data_structure() const;
+#endif
- // set a matrix element to a specified value
- void set_element(int II, int JJ, fp value)
+ // access IA[] and JA[] and A[] using 1-origin Fortran indices
+ int& fIA(int fII) const
{
- const int posn = find_element(II, JJ);
- A_[posn] = value;
+ assert(fII >= 1);
+ assert(fII <= current_N_rows_+1);
+ return IA_[csub(fII)];
}
-
- // sum a value into a matrix element
- void sum_into_element(int II, int JJ, fp value)
+ int& fJA(int fposn) const
{
- const int posn = find_element(II, JJ);
- A_[posn] += value;
+ assert(fposn >= 1);
+ assert(fposn <= N_nonzeros_);
+ return JA_[csub(fposn)];
}
-
- // access (get) a matrix element
- fp element(int II, int JJ) const
+ fp& fA(int fposn) const
{
- const int posn = find_element(II, JJ);
- return (posn >= 0) ? A_[posn] : 0.0;
+ assert(fposn >= 1);
+ assert(fposn <= N_nonzeros_);
+ return A_[csub(fposn)];
}
- // this is a dense matrix ==> all values are stored explicitly
- bool is_explicitly_stored(int II, int JJ) const
- { return find_element(II,JJ) >= 0; }
+protected:
+ // actual size, size allocated for JA_ and A_
+ int N_nonzeros_, N_nonzeros_allocated_;
+
+ // at present rows 1 <= fii <= current_N_rows_ are valid
+ int current_N_rows_;
- // solve linear system J.x = rhs via LAPACK
+ // --> new[]-allocated arrays
+ // ***** due to constructor initialization list ordering,
+ // ***** these must be declared *AFTER* N_nonzeros_allocated_
+ integer* IA_; // size N_rows_+1
+ // valid for Fortran indices
+ // 1 <= fII <= current_N_rows_+1
+ // ... these are grown (reallocated) as necessary by insert_element()
+ // ... using STL vector would be much cleaner here, but alas
+ // there are too many broken compilers/linkers in the world
+ // world don't fully grok vector :( :(
+ integer* JA_; // size N_nonzeros_allocated_
+ // valid for Fortran indices
+ // 1 <= fposn < IA(current_N_rows_)
+ fp* A_; // ditto
+ };
+#endif // HAVE_ROW_SPARSE_JACOBIAN
+
+//******************************************************************************
+
+#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG
+//
+// This class defines the linear solver routine using ILUCG.
+//
+class row_sparse_Jacobian__ILUCG
+ : public row_sparse_Jacobian
+ {
+public:
+ // solve linear system J.x = rhs via ILUCG
// ... rhs and x are nominal-grid gridfns
// ... overwrites Jacobian matrix with its LU decomposition
// ... returns condition number if known,
// 0.0 if matrix is numerically singular,
// -1.0 if condition number is unknown
- fp solve_linear_system(int rhs_gfn, int x_gfn)
- { return -1.0; }; // FIXME
+ fp solve_linear_system(int rhs_gfn, int x_gfn);
// constructor, destructor
public:
- row_sparse_Jacobian_matrix(patch_system& ps,
- row_sparse_Jacobian_sparsity& JS,
+ row_sparse_Jacobian__ILUCG(patch_system& ps,
bool print_msg_flag = false);
- ~row_sparse_Jacobian_matrix();
-
-private:
- // return posn (0-origin) of (II,JJ) in A_ and JA_, or -1 if not found
- int find_element(int II, int JJ) const;
-
- // start a new row in the matrix
- void start_new_row(int II);
-
- // insert new array element (II,JJ) in current row
- // return posn (0-origin) in A_ and JA_
- void insert_element(int II, int JJ, fp value);
+ ~row_sparse_Jacobian__ILUCG();
private:
- // IA[] and JA[], but returning 0-origin indices
- int IA(int II) const
- {
- assert(II >= 0);
- assert(II < NN()+1);
- return IA_[II] - 1;
- }
- int JA(int posn) const
- {
- assert(posn >= 0);
- assert(posn < N_nonzeros_);
- return JA_[posn] - 1;
- }
+ bool print_msg_flag_;
- // values in IA_[] and JA_[] are semantically Fortran subscripts
- // this function converts a C subscript to a Fortran subscript
- int fsub(int csub) const
- { return csub + 1; }
-
-private:
- int N_nonzeros_;
-
- //
- // invariants while we're traversing the matrix:
- // A_[0,current_N_nonzeros_) is valid
- // IA_[0,current_II] is valid
- // JA_[0,current_N_nonzeros_) is valid
- //
- int current_N_nonzeros_;
- int current_II_;
-
- // --> new[]-allocated arrays
- // ***** due to constructor initialization list ordering,
- // ***** these must be declared *AFTER* N_nonzeros_
- fp* A_; // size N_nonzeros_;
- integer* IA_; // size NN() + 1
- integer* JA_; // size N_nonzeros_;
+ // work vectors for ILUCG subroutine
+ // ... allocated by solve_linear_system() on first call
+ integer* itemp_;
+ fp* rtemp_;
};
-#endif // HAVE_ROW_SPARSE_JACOBIAN
+#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG