diff options
author | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-01-22 12:27:46 +0000 |
---|---|---|
committer | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-01-22 12:27:46 +0000 |
commit | 8b0a74a327fbecb12f3d32ecff1fa28a6a6c2a15 (patch) | |
tree | 434af140b12333ff085ea5c5983d46387cbe6267 /src/elliptic | |
parent | 82e2251230c9dc90d1b4650bafc85e75bad61ad7 (diff) |
* add support for sparse-matrix Jacobians ==> works!
* change default in param.ccl to use this
* change default in src/include/config.h to default to no longer
link in LAPACK routines
* update documentation
git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@931 f88db872-0e4f-0410-b76b-b9085cfa78c5
Diffstat (limited to 'src/elliptic')
-rw-r--r-- | src/elliptic/Jacobian.cc | 114 | ||||
-rw-r--r-- | src/elliptic/Jacobian.hh | 202 | ||||
-rw-r--r-- | src/elliptic/README | 40 | ||||
-rw-r--r-- | src/elliptic/dense_Jacobian.cc | 113 | ||||
-rw-r--r-- | src/elliptic/dense_Jacobian.hh | 102 | ||||
-rw-r--r-- | src/elliptic/ilucg.f77 | 1055 | ||||
-rw-r--r-- | src/elliptic/ilucg_wrapper.F77 | 72 | ||||
-rw-r--r-- | src/elliptic/lapack.h | 2 | ||||
-rw-r--r-- | src/elliptic/lapack_wrapper.F77 | 59 | ||||
-rw-r--r-- | src/elliptic/make.code.defn | 4 | ||||
-rw-r--r-- | src/elliptic/row_sparse_Jacobian.cc | 455 | ||||
-rw-r--r-- | src/elliptic/row_sparse_Jacobian.hh | 282 |
12 files changed, 1977 insertions, 523 deletions
diff --git a/src/elliptic/Jacobian.cc b/src/elliptic/Jacobian.cc index 048c6ac..08d8ecc 100644 --- a/src/elliptic/Jacobian.cc +++ b/src/elliptic/Jacobian.cc @@ -4,9 +4,8 @@ // // <<<literal contents of "lapack.hh">>> // -// decode_Jacobian_type -// new_Jacobian_sparsity -// new_Jacobian_matrix +// decode_Jacobian_store_solve_method -- decode string into internal enum +// new_Jacobian -- object factory for Jacobian objects // #include <stdio.h> @@ -44,111 +43,76 @@ using jtutil::error_exit; //****************************************************************************** // -// This function decodes a character string specifying a type (derived class) -// of Jacobian, into an internal enum. +// This function decodes a character string specifying a specific type +// (derived class) of Jacobian, into an internal enum. // -enum Jacobian_type - decode_Jacobian_type(const char Jacobian_type_string[]) +enum Jacobian_store_solve_method + decode_Jacobian_store_solve_method + (const char Jacobian_store_solve_method_string[]) { -if (STRING_EQUAL(Jacobian_type_string, "dense matrix")) +if (STRING_EQUAL(Jacobian_store_solve_method_string, + "dense matrix/LAPACK")) then { - #ifdef HAVE_DENSE_JACOBIAN - return Jacobian_type__dense_matrix; + #ifdef HAVE_DENSE_JACOBIAN__LAPACK + return Jacobian__dense_matrix__LAPACK; #endif } -else if (STRING_EQUAL(Jacobian_type_string, "row-oriented sparse matrix")) +else if (STRING_EQUAL(Jacobian_store_solve_method_string, + "row-oriented sparse matrix/ILUCG")) then { - #ifdef HAVE_ROW_SPARSE_JACOBIAN - return Jacobian_type__row_sparse_matrix; + #ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + return Jacobian__row_sparse_matrix__ILUCG; #endif } else error_exit(ERROR_EXIT, -"decode_Jacobian_type(): unknown Jacobian_type_string=\"%s\"!\n", - Jacobian_type_string); /*NOTREACHED*/ +"decode_Jacobian_store_solve_method():\n" +" unknown Jacobian_store_solve_method_string=\"%s\"!\n", + Jacobian_store_solve_method_string); /*NOTREACHED*/ -// fall through to here ==> we recognize the matrix type, +// fall through to here ==> we recognize the matrix store_solve_method, // but it's not configured error_exit(ERROR_EXIT, "\n" -" decode_Jacobian_type():\n" -" Jacobian type \"%s\" is not configured in this binary;\n" -" see \"src/include/config.hh\" for details on what Jacobian types\n" -" are configured and how to change this\n" +" decode_Jacobian_store_solve_method():\n" +" Jacobian store_solve_method=\"%s\"\n" +" is not configured in this binary see \"src/include/config.hh\"\n" +" for details on what methods are configured and how to change this\n" , - Jacobian_type_string); /*NOTREACHED*/ + Jacobian_store_solve_method_string); /*NOTREACHED*/ } //****************************************************************************** // -// This function is an "object factory" for Jacobian_sparsity objects: -// it constructs and returns a pointer to a new-allocated Jacobian_sparsity -// object of the specified derived type. +// This function is an "object factory" for Jacobian objects: it constructs +// and returns a pointer to a new-allocated Jacobian object of the +// specified derived type. // // FIXME: the patch system shouldn't really have to be non-const, but // the Jacobian constructors all require this to allow the // linear solvers to directly update gridfns // -Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type, - patch_system& ps, - bool print_msg_flag /* = false */) +Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method, + patch_system& ps, + bool print_msg_flag /* = false */) { -switch (Jac_type) +switch (Jac_method) { -#ifdef HAVE_DENSE_JACOBIAN - case Jacobian_type__dense_matrix: - return new dense_Jacobian_sparsity(ps, print_msg_flag); +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + case Jacobian__dense_matrix__LAPACK: + return new dense_Jacobian__LAPACK(ps, print_msg_flag); #endif -#ifdef HAVE_ROW_SPARSE_JACOBIAN - case Jacobian_type__row_sparse_matrix: - return new row_sparse_Jacobian_sparsity(ps, print_msg_flag); +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + case Jacobian__row_sparse_matrix__ILUCG: + return new row_sparse_Jacobian__ILUCG(ps, print_msg_flag); #endif default: error_exit(ERROR_EXIT, -"new_Jacobian_sparsity(): unknown Jacobian_type=(int)%d!\n", - Jac_type); /*NOTREACHED*/ - } -} - -//****************************************************************************** - -// -// This function is an "object factory" for Jacobian_matrix objects: -// it constructs and returns a pointer to a new-allocated Jacobian_matrix -// object of the specified derived type. -// -// FIXME: the patch system shouldn't really have to be non-const, but -// the Jacobian constructors all require this to allow the -// linear solvers to directly update gridfns -// -Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type, - patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) -{ -switch (Jac_type) - { -#ifdef HAVE_DENSE_JACOBIAN - case Jacobian_type__dense_matrix: - return new dense_Jacobian_matrix - (ps, static_cast<dense_Jacobian_sparsity&>(JS), - print_msg_flag); -#endif - -#ifdef HAVE_ROW_SPARSE_JACOBIAN - case Jacobian_type__row_sparse_matrix: - return new row_sparse_Jacobian_matrix - (ps, static_cast<row_sparse_Jacobian_sparsity&>(JS), - print_msg_flag); -#endif - - default: - error_exit(ERROR_EXIT, -"new_Jacobian_matrix(): unknown Jacobian_type=(int)%d!\n", - Jac_type); /*NOTREACHED*/ + "new_Jacobian(): unknown method=(int)%d!\n", + int(Jac_method)); /*NOTREACHED*/ } } diff --git a/src/elliptic/Jacobian.hh b/src/elliptic/Jacobian.hh index d4b90ab..64ca182 100644 --- a/src/elliptic/Jacobian.hh +++ b/src/elliptic/Jacobian.hh @@ -3,10 +3,7 @@ // // Jacobian -- ABC to describe Jacobian matrix -// Jacobian_sparsity -- ABC to accumulate Jacobian sparsity info -// Jacobian_matrix -- ABC to store/use Jacobian matrix -// -// decode_Jacobian_type - decode string into internal enum +// decode_Jacobian_store_solve_method - decode string into internal enum // new_Jacobian - factory method // @@ -16,53 +13,67 @@ // //****************************************************************************** -//****************************************************************************** -//****************************************************************************** // // These classes are used to store and manipulate Jacobian matrices // for a patch system. // -// The inheritance diagram for the Jacobian classes looks like this: +// Jacobian is an abstract base class (ABC) that defines the generic +// Jacobian API. We derive classes from it for each type of sparsity +// pattern, then we derive classes from them for each linear solver. +// +// At present the inheritance graph looks like this: // Jacobian -// Jacobian_sparsity -// dense_Jacobian_sparsity #ifdef HAVE_DENSE_JACOBIAN -// row_sparse_Jacobian_sparsity #ifdef HAVE_ROW_SPARSE_JACOBIAN -// Jacobian_matrix -// dense_Jacobian_matrix #ifdef HAVE_DENSE_JACOBIAN -// row_sparse_Jacobian_matrix #ifdef HAVE_ROW_SPARSE_JACOBIAN -// where the most-derived classes are all conditional on the noted #ifdef -// symbols. -// -// The Jacobian_sparsity objects are used to accumulate information -// on the Jacobian's sparsity pattern; this is then used in constructing -// the corresponding Jacobian_matrix object (which actually stores -// the Jacobian, and contains member functions to solve elliptic systems -// using the Jacobian as the LHS matrix). +// dense_Jacobian +// dense_Jacobian__LAPACK +// row_sparse_Jacobian +// row_sparse_Jacobian__ILUCG +// each derived class is inside a corresponding #ifdef. The #ifdef +// symbols for linear solvers are specified in "../include/config.h"; +// those for sparsity patterns are specified below based on the ones +// for linear solvers. +// + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + #define HAVE_DENSE_JACOBIAN +#endif + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + #define HAVE_ROW_SPARSE_JACOBIAN +#endif + +//****************************************************************************** + // // We assume a "traversal" interface for computing Jacobians, defined -// by the Jacobian API. Because this API is shared by (all) the -// Jacobian_sparsity and Jacobian_matrix classes, the same traversal -// routines (prototyped to update Jacobian objects) can be used both -// to record sparsity patterns (ignoring the actual floating-point values) -// and to store the Jacobian matrix. -// -// Thus the typical code to construct and use a Jacobian matrix looks -// like this: +// by the Jacobian API. The typical code to construct and use a +// Jacobian matrix looks like this: // // prototype -// void trverse_Jacobian(const patch_system& ps, Jacobian& Jac); +// // ... calls Jac.zero_matrix() +// // Jac.set_element() +// // Jac.sum_into_element() +// void traverse_Jacobian(const patch_system& ps, Jacobian& Jac); // -// Jacobian_sparsity& JS = new_Jacobian_sparsity(Jac_type, ps); -// traverse_Jacobian(ps, JS); -// Jacobian_matrix& Jac = new_Jacobian_matrix(Jac_type, ps, JS); +// Jacobian& Jac = new_Jacobian(Jac_type, ps); // traverse_Jacobian(ps, Jac); // Jac.solve_linear_system(...); // +// Jac.zero_matrix() +// traverse_Jacobian(ps, Jac); // must specify same sparsity pattern +// // as before +// Jac.solve_linear_system(...); +// +// // // A row/column index of the Jacobian (denoted II/JJ) is a 0-origin grid // point number within the patch system. // +// Since many of the derived classes use Fortran routines, we also use +// 1-origin indices; these have a leading "f", eg fII/fJJ. +// + +// // Note that the APIs here implicitly assume there is only a *single* gridfn // in the Jacobian computation. (If we had N gridfns for this, then the // Jacobian would really be a block matrix with N*N blocks.) This isn't @@ -71,13 +82,13 @@ //****************************************************************************** -// ABC to define Jacobian-traversal interface +// ABC to define Jacobian matrix class Jacobian { public: // basic meta-info patch_system& my_patch_system() const { return ps_; } - int NN() const { return NN_; } + int N_rows() const { return N_rows_; } // convert (patch,irho,isigma) <--> row/column index int II_of_patch_irho_isigma(const patch& p, int irho, int isigma) @@ -87,6 +98,29 @@ public: const { return ps_.patch_irho_isigma_of_gpn(II, irho,isigma); } + + // + // convert C <--> Fortran indices + // + int csub(int f) const { return f-1; } + int fsub(int c) const { return c+1; } + + + // + // routines to access the matrix + // + + // get a matrix element + virtual fp element(int II, int JJ) const = 0; + + // is a given element explicitly stored, or implicitly 0 via sparsity + virtual bool is_explicitly_stored(int II, int JJ) const = 0; + + + // + // routines for setting values in the matrix + // + // zero the entire matrix virtual void zero_matrix() = 0; @@ -96,11 +130,28 @@ public: // sum a value into a matrix element virtual void sum_into_element(int II, int JJ, fp value) = 0; -protected: + + // + // solve linear system J.x = rhs + // ... rhs and x are nominal-grid gridfns + // ... may modify Jacobian matrix (eg for LU decomposition) + // ... returns 0.0 if matrix is numerically singular + // condition number (> 0.0) if known + // -1.0 if condition number is unknown + // ... once this has been called, the sparsity pattern should + // not be changed, i.e. no new nonzeros should be introduced + // into the matrix + // + virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0; + + + // // constructor, destructor + // +protected: Jacobian(patch_system& ps) : ps_(ps), - NN_(ps.N_grid_points()) + N_rows_(ps.N_grid_points()) { } public: virtual ~Jacobian() { } @@ -114,66 +165,18 @@ private: protected: patch_system& ps_; - int NN_; // ps_.N_grid_points() - }; - -//****************************************************************************** - -// ABC to accumulate Jacobian sparsity info -class Jacobian_sparsity - : public Jacobian - { -public: - Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false) - : Jacobian(ps) - { } - virtual ~Jacobian_sparsity() { } - }; - -//****************************************************************************** - -// Jacobian_matrix -- ABC to store/use Jacobian matrix -class Jacobian_matrix - : public Jacobian - { -public: - // access (get) a matrix element - virtual fp element(int II, int JJ) const = 0; - - // is a given element explicitly stored, or implicitly 0 via sparsity - virtual bool is_explicitly_stored(int II, int JJ) const = 0; - - // solve linear system J.x = rhs - // ... rhs and x are nominal-grid gridfns - // ... may modify Jacobian matrix (eg for LU decomposition) - // ... returns 0.0 if matrix is numerically singular - // condition number (> 0.0) if known - // -1.0 if condition number is unknown - virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0; - - // constructor, destructor - // ... note Jacobian_sparsity argument of constructor is non-const - // to allow us to take over ownership of data structures - Jacobian_matrix(patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag = false) - : Jacobian(ps) - { } - virtual ~Jacobian_matrix() { } + int N_rows_; }; //****************************************************************************** -//****************************************************************************** -//****************************************************************************** -enum Jacobian_type +enum Jacobian_store_solve_method { -#ifdef HAVE_DENSE_JACOBIAN - Jacobian_type__dense_matrix, +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + Jacobian__dense_matrix__LAPACK, #endif -#ifdef HAVE_ROW_SPARSE_JACOBIAN - Jacobian_type__row_sparse_matrix // no comma on last entry in enum +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + Jacobian__row_sparse_matrix__ILUCG // no comma on last entry in enum #endif }; @@ -182,17 +185,12 @@ enum Jacobian_type // // decode string into internal enum -enum Jacobian_type - decode_Jacobian_type(const char Jacobian_type_string[]); +enum Jacobian_store_solve_method + decode_Jacobian_store_solve_method + (const char Jacobian_store_solve_method_string[]); // "object factory" routines to construct and return // pointers to new-allocated objects of specified derived types -class Jacobian_sparsity; -class Jacobian_matrix; -Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type, - patch_system& ps, - bool print_msg_flag = false); -Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type, - patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag = false); +Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method, + patch_system& ps, + bool print_msg_flag = false); diff --git a/src/elliptic/README b/src/elliptic/README index e20a5ba..09fa662 100644 --- a/src/elliptic/README +++ b/src/elliptic/README @@ -3,6 +3,40 @@ This directory contains code for solving elliptic systems on our The main files are as follows: -Jacobian.{cc,hh} - These classes stores a Jacobian matrix and interface to - linear-solver routines. +Jacobian.{hh,cc} + These define a Jacobian class. This is a generic interface + (a C++ abstract base class) to set up a Jacobian matrix and + solve linear systems with this matrix as the right hand side + matrix. + +dense_Jacobian.{hh,cc} + These define a dense_Jacobian class (to store a Jacobian matrix + in a Fortran dense-matrix format) and a dense_Jacobian__LAPACK + class (to solve linear systems using LAPACK routines). These + classes are derived from (and hence share the generic interface + of) the Jacobian class. + +row_sparse_Jacobian.{hh,cc} + These define a row_sparse_Jacobian class (to store a Jacobian + matrix in a row-oriented sparse-matrix format) and a + row_sparse_Jacobian__ILUCG class (to solve linear systems using + an ILUCG). These classes are derived from (and hence share the + generic interface of) the Jacobian class. + +lapack.h + Header file defining C/C++ prototypes for a few LAPACK routines. +lapack_wrapper.F77 + Wrapper routines around a few LAPACK routines, to avoid problems + with C <--> Fortran passing of character-string arguments. + +ilucg.h + Header file defining C/C++ prototypes for ILUCG routines +ilucg.f77 + ILUCG (Incomplete LU-decomposition / conjugate gradiant) + subroutine. I don't know the original author of this routine; + it was provided to me by Tom Nicol, UBC Computing Center, + in c.1985. +ilucg_wrapper.F77 + Wrapper routines around the ILUCG routines, to avoid problems + with C <--> Fortran passing of Fortran logical return results. + diff --git a/src/elliptic/dense_Jacobian.cc b/src/elliptic/dense_Jacobian.cc index 72dc797..7999513 100644 --- a/src/elliptic/dense_Jacobian.cc +++ b/src/elliptic/dense_Jacobian.cc @@ -1,15 +1,20 @@ -// Jacobian.cc -- data structures for the Jacobian matrix +// dense_Jacobian.cc -- dense-matrix Jacobian // $Header$ // // <<<literal contents of "lapack.hh">>> // #ifdef HAVE_DENSE_JACOBIAN -// dense_Jacobian_matrix::dense_Jacobian_matrix -// dense_Jacobian_matrix::~dense_Jacobian_matrix -// dense_Jacobian_matrix::zero_matrix -// dense_Jacobian_matrix::solve_linear_system +// dense_Jacobian::dense_Jacobian +// dense_Jacobian::zero_matrix #endif +// +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// dense_Jacobian__LAPACK::dense_Jacobian__LAPACK +// dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK +// dense_Jacobian__LAPACK::solve_linear_system +#endif +// #include <stdio.h> #include <assert.h> @@ -45,8 +50,10 @@ using jtutil::error_exit; //****************************************************************************** // -// FIXME: Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), -// so we include the contents of "lapack.h" inline here. +// FIXME: +// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), +// so we include the contents of "lapack.h" inline here. This is a +// kludge! :( :( //#include "lapack.h" // @@ -57,7 +64,7 @@ using jtutil::error_exit; /* * prerequisites: * "cctk.h" - * "config.hh" + * "config.h" */ #ifdef __cplusplus @@ -116,21 +123,17 @@ void CCTK_FCALL #ifdef HAVE_DENSE_JACOBIAN // -// This function constructs a dense_Jacobian_matrix object. +// This function constructs a dense_Jacobian object. // -dense_Jacobian_matrix::dense_Jacobian_matrix(patch_system& ps, - dense_Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) - : Jacobian_matrix(ps, JS), - matrix_(0,NN()-1, 0,NN()-1), - pivot_(new integer[NN()]), - iwork_(new integer[NN()]), - rwork_(new fp[4*NN()]) // no comma +dense_Jacobian::dense_Jacobian(patch_system& ps, + bool print_msg_flag /* = false */) + : Jacobian(ps), + matrix_(0,N_rows_-1, 0,N_rows_-1) { if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, - "dense_Jacobian_matrix ctor: NN()=%d", - NN()); + "constructing dense_Jacobian: N_rows_=%d", + N_rows_); } #endif // HAVE_DENSE_JACOBIAN @@ -138,55 +141,73 @@ if (print_msg_flag) #ifdef HAVE_DENSE_JACOBIAN // -// THis function destroys a dense_Jacobian_matrix object. +// This function zeros a dense_Jacobian object. +// Bugs: it could be made more efficient... // -dense_Jacobian_matrix::~dense_Jacobian_matrix() +void dense_Jacobian::zero_matrix() { -delete[] iwork_; -delete[] rwork_; -delete[] pivot_; + for (int JJ = 0 ; JJ < N_rows_ ; ++JJ) + { + for (int II = 0 ; II < N_rows_ ; ++II) + { + set_element(II,JJ, 0.0); + } + } } #endif // HAVE_DENSE_JACOBIAN //****************************************************************************** +//****************************************************************************** +//****************************************************************************** -#ifdef HAVE_DENSE_JACOBIAN +#ifdef HAVE_DENSE_JACOBIAN__LAPACK // -// This function zeros a dense_Jacobian_matrix object. -// Bugs: it could be made more efficient... +// This function constructs a dense_Jacobian__LAPACK object. // -void dense_Jacobian_matrix::zero_matrix() +dense_Jacobian__LAPACK::dense_Jacobian__LAPACK(patch_system& ps, + bool print_msg_flag /* = false */) + : dense_Jacobian(ps, print_msg_flag), + pivot_(new integer[ N_rows_]), + iwork_(new integer[ N_rows_]), + rwork_(new fp [4*N_rows_]) { - for (int JJ = 0 ; JJ < NN() ; ++JJ) - { - for (int II = 0 ; II < NN() ; ++II) - { - set_element(II,JJ, 0.0); - } - } } -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK //****************************************************************************** -#ifdef HAVE_DENSE_JACOBIAN +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// +// This function destroys a dense_Jacobian__LAPACK object. +// +dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK() +{ +delete[] rwork_; +delete[] iwork_; +delete[] pivot_; +} +#endif // HAVE_DENSE_JACOBIAN__LAPACK + +//****************************************************************************** + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK // // This function solves the linear system J.x = rhs, with rhs and x // being nominal-grid gridfns, using LAPACK LU-decomposition routines. // It returns the estimated infinity-norm condition number of the linear // system, or 0.0 if the matrix is numerically singular // -fp dense_Jacobian_matrix::solve_linear_system(int rhs_gfn, int x_gfn) +fp dense_Jacobian__LAPACK::solve_linear_system(int rhs_gfn, int x_gfn) { -const fp *rhs = my_patch_system().gridfn_data(rhs_gfn); -fp *x = my_patch_system().gridfn_data(x_gfn); +const fp *rhs = ps_.gridfn_data(rhs_gfn); +fp *x = ps_.gridfn_data(x_gfn); fp *A = matrix_.data_array(); const integer infinity_norm_flag = 0; const integer stride = 1; const integer NRHS = 1; -const integer N = NN(); -const integer N2 = NN() * NN(); +const integer N = N_rows_; +const integer N2 = N_rows_ * N_rows_; // compute the infinity-norm of the matrix A // ... max_posn = 1-origin index of A[] element with largest absolute value @@ -204,7 +225,7 @@ const fp A_infnorm = jtutil::abs(A[max_posn-1]); // ... [sd]gesv() use an "in out" design, where the same argument // is used for both rhs and x ==> we must first copy rhs to x // - for (int II = 0 ; II < NN() ; ++II) + for (int II = 0 ; II < N_rows_ ; ++II) { x[II] = rhs[II]; } @@ -220,8 +241,8 @@ integer info; if (info < 0) then error_exit(ERROR_EXIT, "\n" -"***** dense_Jacobian_matrix::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" -" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!\n" +"***** dense_Jacobian__LAPACK::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" +" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!" , rhs_gfn, x_gfn, int(info)); /*NOTREACHED*/ @@ -248,4 +269,4 @@ if (rcond == 0.0) // *** (singular matrix) return 1.0/rcond; } -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK diff --git a/src/elliptic/dense_Jacobian.hh b/src/elliptic/dense_Jacobian.hh index 1e269d5..0c85f47 100644 --- a/src/elliptic/dense_Jacobian.hh +++ b/src/elliptic/dense_Jacobian.hh @@ -1,10 +1,12 @@ -// Jacobian.hh -- data structures for the Jacobian matrix +// dense_Jacobian.hh -- dense-matrix Jacobian // $Header$ // #ifdef HAVE_DENSE_JACOBIAN -// dense_Jacobian_sparsity -- (no-op) accumulate dense Jacobian sparsity info -// dense_Jacobian_matrix -- Jacobian stored as a dense matrix +// dense_Jacobian -- Jacobian stored as a dense matrix +#endif +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// dense_Jacobian__LAPACK -- dense_Jacobian with LAPACK linear solver #endif // @@ -18,44 +20,30 @@ #ifdef HAVE_DENSE_JACOBIAN // -// This class accumulates the sparsity pattern of a dense-matrix Jacobian. -// -// Since a dense-matrix Jacobian doesn't care about the sparsity pattern, -// the member functions of this class are just no-ops. +// This class stores the Jacobian as a dense matrix in Fortran (column) +// order. // -class dense_Jacobian_sparsity - : public Jacobian_sparsity +class dense_Jacobian + : public Jacobian { public: - // record the zeroing of the entire matrix - void zero_matrix() { } + // + // routines to access the matrix + // - // record the setting of a matrix element to a specified value - void set_element(int II, int JJ, fp value) { } + // get a matrix element + fp element(int II, int JJ) const + { return matrix_(JJ,II); } - // record the summing of a value into a matrix element - void sum_into_element(int II, int JJ, fp value) { } + // dense matrix ==> all elements are explicitly stored + bool is_explicitly_stored(int II, int JJ) const + { return true; } - // constructor, destructor - dense_Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false) - : Jacobian_sparsity(ps, print_msg_flag) - { } - ~dense_Jacobian_sparsity() { } - }; -#endif // HAVE_DENSE_JACOBIAN -//****************************************************************************** + // + // routines for setting values in the matrix + // -#ifdef HAVE_DENSE_JACOBIAN -// -// This class stores the Jacobian as a dense matrix in Fortran (column) -// order. -// -class dense_Jacobian_matrix - : public Jacobian_matrix - { -public: // zero the entire matrix void zero_matrix(); @@ -67,16 +55,30 @@ public: void sum_into_element(int II, int JJ, fp value) { matrix_(JJ,II) += value; } - // access (get) a matrix element - fp element(int II, int JJ) - const - { return matrix_(JJ,II); } + // + // constructor, destructor + // +protected: + dense_Jacobian(patch_system& ps, + bool print_msg_flag = false); + ~dense_Jacobian() { } + +protected: + // Fortran storage order ==> subscripts are (JJ,II) + jtutil::array2d<fp> matrix_; + }; +#endif // HAVE_DENSE_JACOBIAN - // this is a dense matrix ==> all values are stored explicitly - bool is_explicitly_stored(int II, int JJ) - const - { return true; } +//****************************************************************************** +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// +// This class defines the linear solver routine using LAPACK. +// +class dense_Jacobian__LAPACK + : public dense_Jacobian + { +public: // solve linear system J.x = rhs via LAPACK // ... rhs and x are nominal-grid gridfns // ... overwrites Jacobian matrix with its LU decomposition @@ -87,20 +89,16 @@ public: // constructor, destructor public: - dense_Jacobian_matrix(patch_system& ps, - dense_Jacobian_sparsity& JS, - bool print_msg_flag = false); - ~dense_Jacobian_matrix(); + dense_Jacobian__LAPACK(patch_system& ps, + bool print_msg_flag = false); + ~dense_Jacobian__LAPACK(); private: - // subscripts are (JJ,II) (both 0-origin) - jtutil::array2d<fp> matrix_; - // pivot vector for LAPACK routines - integer *pivot_; + integer *pivot_; // size N_rows_ // work vectors for LAPACK condition number computation - integer *iwork_; - fp *rwork_; + integer *iwork_; // size N_rows_ + fp *rwork_; // size 4*N_rows_ }; -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK diff --git a/src/elliptic/ilucg.f77 b/src/elliptic/ilucg.f77 new file mode 100644 index 0000000..68f99f5 --- /dev/null +++ b/src/elliptic/ilucg.f77 @@ -0,0 +1,1055 @@ + LOGICAL FUNCTION SILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE) + IMPLICIT REAL (A-H,O-Z) +C +C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT +C - -- - - +C WHERE: +C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND +C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND +C B AND X ARE THE NEW RHS AND INITIAL GUESS. +C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE +C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO +C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1). +C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES +C THE COLUMN NUMBER OF A(K). +C A IS A REAL ARRAY DIMENSIONED MAX. IT CONTAINS THE +C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW. +C B CONTAINS THE RHS VECTOR. +C X IS A REAL ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS +C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION. +C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2. +C RTEMP IS AN REAL SCRATCH ARRAY DIMENSIONED 4*|N|+MAX. +C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE +C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE +C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE +C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE +C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS +C .LT. 0.0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION, +C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO +C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR. +C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT". +C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES +C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE). +C ON ERROR RETURN, THE ROW IN ERROR. +C +C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN. +C +C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.) +C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.) +C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.) +C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.) +C +C REFERENCE: +C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT +C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS", +C J.COMPUT.PHYS. JAN 1978 PP 43-65 +C + DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*) + LOGICAL SLU0 + NP=IABS(N) + IE=0 + IF (NP.EQ.0) GO TO 20 +C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS. + N1=NP+1 + MAX=IA(N1)-IA(1) + ILU=1 + JLU=ILU+N1 + ID=JLU+MAX + IC=ID+NP + JC=IC+N1 + JCI=JC+MAX + IR=1 + IP=IR+NP + IS1=IP+NP + IS2=IS1+NP + IALU=IS2+NP + IF (N.LT.0) GO TO 10 +C DO INCOMPLETE LU DECOMPOSITION + IF (SLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), + * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20 +C AND DO CONJUGATE GRADIENT ITERATIONS +C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS +C - J. THORNBURG, 17/MAY/85. +10 CALL SNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), + * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2), + * EPS,ITER,IE) + SILUCG = .FALSE. + RETURN +20 SILUCG = .TRUE. + RETURN + END + LOGICAL FUNCTION SLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*), + * ALU(*),ILU(*),JLU(*),ID(N),V(N) + LOGICAL NODIAG + COMMON /ICBD00/ ICBAD +C INCOMPLETE LU DECOMPOSITION +C WHERE: +C N,IA,JA, AND A ARE DESCRIBED IN FUNCTION SILUCG +C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE +C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN +C ARRAY JC. +C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN +C THE COLUMN STRUCTURE. +C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT +C OF THE COLUMN STRUCTURE. +C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL +C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE +C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U. +C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU. +C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS +C INDICES TO THE DIAGONAL ELEMENTS OF U. +C V IS A REAL SCRATCH VECTOR OF LENGTH N. +C IE GIVES THE ROW NUMBER IN ERROR. +C +C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR. +C +C NOTE: SLU0 SETS ARGUMENTS IC THROUGH V. +C + ICBAD=0 +C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U. +C +C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE + DO 10 I=1,N + IC(I)=0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + NODIAG=.TRUE. + DO 20 K=KS,KE + J=JA(K) + IF (J.LT.1.OR.J.GT.N) GO TO 210 + IC(J)=IC(J)+1 + IF (J.EQ.I) NODIAG=.FALSE. +20 CONTINUE + IF (NODIAG) GO TO 210 +30 CONTINUE +C MAKE IC INTO INDICES + KOLD=IC(1) + IC(1)=1 + DO 40 I=1,N + KNEW=IC(I+1) + IF (KOLD.EQ.0) GO TO 210 + IC(I+1)=IC(I)+KOLD + KOLD=KNEW +40 CONTINUE +C SET JC AND JCI FOR COLUMN STRUCTURE + DO 60 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + DO 50 K=KS,KE + J=JA(K) + L=IC(J) + IC(J)=L+1 + JC(L)=I + JCI(L)=K +50 CONTINUE +60 CONTINUE +C FIX UP IC + KOLD=IC(1) + IC(1)=1 + DO 70 I=1,N + KNEW=IC(I+1) + IC(I+1)=KOLD + KOLD=KNEW +70 CONTINUE +C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE + NP=N+1 + DO 80 I=1,NP + ILU(I)=IA(I) +80 CONTINUE +C MOVE ELEMENTS, SET JLU AND ID + DO 100 J=1,N + KS=IC(J) + KE=IC(J+1)-1 + DO 90 K=KS,KE + I=JC(K) + L=ILU(I) + ILU(I)=L+1 + JLU(L)=J + KK=JCI(K) + ALU(L)=A(KK) + IF (I.EQ.J) ID(J)=L +90 CONTINUE +100 CONTINUE +C RESET ILU (COULD JUST USE IA) + DO 110 I=1,NP + ILU(I)=IA(I) +110 CONTINUE +C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE +C +C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION + DO 120 I=1,N + V(I)=0.0 +120 CONTINUE + DO 200 IROW=1,N + I=ID(IROW) + PIVOT=ALU(I) + IF (PIVOT.NE.0.0) GO TO 140 +C THIS CASE MAKES THE ILU LESS ACCURATE + ICBAD=ICBAD+1 + KS=ILU(IROW) + KE=ILU(IROW+1)-1 + DO 130 K=KS,KE + PIVOT=PIVOT+ABS(ALU(K)) +130 CONTINUE + IF (PIVOT.EQ.0.0) GO TO 220 +140 PIVOT=1.0/PIVOT + ALU(I)=PIVOT + KKS=I+1 + KKE=ILU(IROW+1)-1 + IF (KKS.GT.KKE) GO TO 160 + DO 150 K=KKS,KKE + J=JLU(K) + V(J)=ALU(K) +150 CONTINUE +C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX +160 KS=IC(IROW) + KE=IC(IROW+1)-1 + DO 190 K=KS,KE + I=JC(K) + IF (I.LE.IROW) GO TO 190 + LS=ILU(I) + LE=ILU(I+1)-1 + DO 180 L=LS,LE + J=JLU(L) + IF (J.LT.IROW) GO TO 180 + IF (J.GT.IROW) GO TO 170 + AMULT=ALU(L)*PIVOT + ALU(L)=AMULT + IF (AMULT.EQ.0.0) GO TO 190 + GO TO 180 +170 IF (V(J).EQ.0.0) GO TO 180 + ALU(L)=ALU(L)-AMULT*V(J) +180 CONTINUE +190 CONTINUE +C RESET V + IF (KKS.GT.KKE) GO TO 200 + DO 195 K=KKS,KKE + J=JLU(K) + V(J)=0.0 +195 CONTINUE +200 CONTINUE +C NORMAL RETURN + SLU0 = .FALSE. + RETURN +C ERROR RETURNS +210 IE=I + SLU0 = .TRUE. + RETURN +C NORMAL RETURN +220 IE=IROW + SLU0 = .TRUE. + RETURN + END + SUBROUTINE SNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2, + * EPS,ITER,IE) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N), + * R(N),P(N),S1(N),S2(N) +C NONSYMMETRIC CONJUGATE GRADIENT +C WHERE: +C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE SILUCG. +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU. +C JLU GIVES COLUMN NUMBER. +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U. +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U. +C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE +C ITERATIONS. +C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85. +C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE +C SILUCG). +C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT". +C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF +C NO CONVERGENCE. +C +C R0=B-A*X0 + CALL SMUL10(N,IA,JA,A,X,R) + DO 10 I=1,N + R(I)=B(I)-R(I) +10 CONTINUE +C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0 +C FIRST SOLVE L*LT*S1=R0 + CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C TIMES TRANSPOSE OF A + CALL SMUL20(N,IA,JA,A,S1,S2) +C THEN SOLVE UT*U*P=S2 + CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,P) + IE=0 + RDOT = SGVV(R,S1,N) +C LOOP BEGINS HERE +20 CALL SMUL30(N,ILU,JLU,ID,ALU,P,S2) + PDOT = SGVV(P,S2,N) +C + IF (PDOT.EQ.0.0) RETURN +C + ALPHA=RDOT/PDOT +C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P) + XMAX=1.0 + XDIF=0.0 + DO 30 I=1,N + AP=ALPHA*P(I) + X(I)=X(I)+AP +C EQUATION 9PB X=X+ALPHA*P + AP=ABS(AP) + XX=ABS(X(I)) + IF (AP.GT.XDIF) XDIF=AP + IF (XX.GT.XMAX) XMAX=XX +30 CONTINUE + IE=IE+1 +C +C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89) +C + IF ((EPS .GT. 0.0) .AND. (XDIF .LE. EPS * XMAX)) RETURN + IF ((EPS .LT. 0.0) .AND. (XMAX + XDIF/ABS(EPS) .EQ. XMAX)) + * RETURN +C +C EXCEEDED ITERATION LIMIT? +C + IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60 + CALL SMUL10(N,IA,JA,A,P,S2) + DO 40 I=1,N + R(I)=R(I)-ALPHA*S2(I) +C EQUATION 9PC R=R-ALPHA*A*P +40 CONTINUE + CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C CALL INPROD(R,S1,EDOT,RRDOT,N) + RRDOT = SGVV(R,S1,N) + BETA=RRDOT/RDOT +C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R) + RDOT=RRDOT + CALL SMUL20(N,IA,JA,A,S1,S2) + CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,S1) + DO 50 I=1,N + P(I)=S1(I)+BETA*P(I) +C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P +50 CONTINUE + GO TO 20 +60 IE=-IE + RETURN + END + SUBROUTINE SMUL10(N,IA,JA,A,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 20 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + SUM=0.0 + DO 10 K=KS,KE + J=JA(K) + SUM=SUM+A(K)*B(J) +10 CONTINUE + X(I)=SUM +20 CONTINUE + RETURN + END + SUBROUTINE SMUL20(N,IA,JA,A,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + BB=B(I) + DO 20 K=KS,KE + J=JA(K) + X(J)=X(J)+A(K)*BB +20 CONTINUE +30 CONTINUE + RETURN + END + SUBROUTINE SMUL30(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS +C B IS THE VECTOR +C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0 +10 CONTINUE + DO 50 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + DIAG=1.0/ALU(KS-1) + XX=DIAG*B(I) + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + XX=XX+ALU(K)*B(J) +20 CONTINUE +30 X(I)=X(I)+DIAG*XX + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)+ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + SUBROUTINE SSUBU0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + XX=X(I)*ALU(KS-1) + X(I)=XX + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +20 CONTINUE +30 CONTINUE +C BACK SUBSTITUTION + DO 60 II=1,N + I=NP-II + KS=ID(I)+1 + KE=ILU(I+1)-1 + SUM=0.0 + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +40 CONTINUE +50 X(I)=(X(I)-SUM)*ALU(KS-1) +60 CONTINUE + RETURN + END + SUBROUTINE SSUBL0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU +C JLU GIVES THE COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 30 + SUM=0.0 + DO 20 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +20 CONTINUE + X(I)=X(I)-SUM +30 CONTINUE +C BACK SUBSTITUTION + DO 50 II=1,N + I=NP-II + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 50 + XX=X(I) + IF (XX.EQ.0.0) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + REAL FUNCTION SGVV(V,W,N) + IMPLICIT REAL (A-H,O-Z) + DIMENSION V(N),W(N) +C SUBROUTINE TO COMPUTE REAL VECTOR DOT PRODUCT. +C + SUM = 0.0 + DO 10 I = 1,N + SUM = SUM + V(I)*W(I) +10 CONTINUE + SGVV = SUM + RETURN + END + LOGICAL FUNCTION DILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) +C +C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT +C - -- - - +C WHERE: +C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND +C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND +C B AND X ARE THE NEW RHS AND INITIAL GUESS. +C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE +C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO +C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1). +C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES +C THE COLUMN NUMBER OF A(K). +C A IS A DOUBLE PRECISION ARRAY DIMENSIONED MAX. IT CONTAINS THE +C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW. +C B CONTAINS THE RHS VECTOR. +C X IS A DOUBLE PRECISION ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS +C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION. +C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2. +C RTEMP IS A DOUBLE PRECISION SCRATCH ARRAY DIMENSIONED 4*|N|+MAX. +C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE +C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE +C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE +C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE +C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS +C .LT. 0.0D0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION, +C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO +C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR. +C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT". +C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES +C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE). +C ON ERROR RETURN, THE ROW IN ERROR. +C +C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN. +C +C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.) +C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.) +C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.) +C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.) +C +C REFERENCE: +C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT +C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS", +C J.COMPUT.PHYS. JAN 1978 PP 43-65 +C + DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*) + LOGICAL DLU0 + NP=IABS(N) + IE=0 + IF (NP.EQ.0) GO TO 20 +C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS. + N1=NP+1 + MAX=IA(N1)-IA(1) + ILU=1 + JLU=ILU+N1 + ID=JLU+MAX + IC=ID+NP + JC=IC+N1 + JCI=JC+MAX + IR=1 + IP=IR+NP + IS1=IP+NP + IS2=IS1+NP + IALU=IS2+NP + IF (N.LT.0) GO TO 10 +C DO INCOMPLETE LU DECOMPOSITION + IF (DLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), + * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20 +C AND DO CONJUGATE GRADIENT ITERATIONS +C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS +C - J. THORNBURG, 17/MAY/85. +10 CALL DNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), + * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2), + * EPS,ITER,IE) + DILUCG = .FALSE. + RETURN +20 DILUCG = .TRUE. + RETURN + END + LOGICAL FUNCTION DLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*), + * ALU(*),ILU(*),JLU(*),ID(N),V(N) + LOGICAL NODIAG + COMMON /ICBD00/ ICBAD +C INCOMPLETE LU DECOMPOSITION +C WHERE: +C N,IA,JA, AND A ARE DESCRIBED IN SUBROUTINE ILUCG +C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE +C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN +C ARRAY JC. +C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN +C THE COLUMN STRUCTURE. +C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT +C OF THE COLUMN STRUCTURE. +C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL +C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE +C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U. +C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU. +C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS +C INDICES TO THE DIAGONAL ELEMENTS OF U. +C V IS A REAL SCRATCH VECTOR OF LENGTH N. +C IE GIVES THE ROW NUMBER IN ERROR. +C +C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR. +C +C NOTE: DLU0 SETS ARGUMENTS IC THROUGH V. +C + ICBAD=0 +C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U. +C +C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE + DO 10 I=1,N + IC(I)=0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + NODIAG=.TRUE. + DO 20 K=KS,KE + J=JA(K) + IF (J.LT.1.OR.J.GT.N) GO TO 210 + IC(J)=IC(J)+1 + IF (J.EQ.I) NODIAG=.FALSE. +20 CONTINUE + IF (NODIAG) GO TO 210 +30 CONTINUE +C MAKE IC INTO INDICES + KOLD=IC(1) + IC(1)=1 + DO 40 I=1,N + KNEW=IC(I+1) + IF (KOLD.EQ.0) GO TO 210 + IC(I+1)=IC(I)+KOLD + KOLD=KNEW +40 CONTINUE +C SET JC AND JCI FOR COLUMN STRUCTURE + DO 60 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + DO 50 K=KS,KE + J=JA(K) + L=IC(J) + IC(J)=L+1 + JC(L)=I + JCI(L)=K +50 CONTINUE +60 CONTINUE +C FIX UP IC + KOLD=IC(1) + IC(1)=1 + DO 70 I=1,N + KNEW=IC(I+1) + IC(I+1)=KOLD + KOLD=KNEW +70 CONTINUE +C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE + NP=N+1 + DO 80 I=1,NP + ILU(I)=IA(I) +80 CONTINUE +C MOVE ELEMENTS, SET JLU AND ID + DO 100 J=1,N + KS=IC(J) + KE=IC(J+1)-1 + DO 90 K=KS,KE + I=JC(K) + L=ILU(I) + ILU(I)=L+1 + JLU(L)=J + KK=JCI(K) + ALU(L)=A(KK) + IF (I.EQ.J) ID(J)=L +90 CONTINUE +100 CONTINUE +C RESET ILU (COULD JUST USE IA) + DO 110 I=1,NP + ILU(I)=IA(I) +110 CONTINUE +C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE +C +C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION + DO 120 I=1,N + V(I)=0.0D0 +120 CONTINUE + DO 200 IROW=1,N + I=ID(IROW) + PIVOT=ALU(I) + IF (PIVOT.NE.0.0D0) GO TO 140 +C THIS CASE MAKES THE ILU LESS ACCURATE + ICBAD=ICBAD+1 + KS=ILU(IROW) + KE=ILU(IROW+1)-1 + DO 130 K=KS,KE + PIVOT=PIVOT+DABS(ALU(K)) +130 CONTINUE + IF (PIVOT.EQ.0.0D0) GO TO 220 +140 PIVOT=1.0D0/PIVOT + ALU(I)=PIVOT + KKS=I+1 + KKE=ILU(IROW+1)-1 + IF (KKS.GT.KKE) GO TO 160 + DO 150 K=KKS,KKE + J=JLU(K) + V(J)=ALU(K) +150 CONTINUE +C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX +160 KS=IC(IROW) + KE=IC(IROW+1)-1 + DO 190 K=KS,KE + I=JC(K) + IF (I.LE.IROW) GO TO 190 + LS=ILU(I) + LE=ILU(I+1)-1 + DO 180 L=LS,LE + J=JLU(L) + IF (J.LT.IROW) GO TO 180 + IF (J.GT.IROW) GO TO 170 + AMULT=ALU(L)*PIVOT + ALU(L)=AMULT + IF (AMULT.EQ.0.0) GO TO 190 + GO TO 180 +170 IF (V(J).EQ.0.0D0) GO TO 180 + ALU(L)=ALU(L)-AMULT*V(J) +180 CONTINUE +190 CONTINUE +C RESET V + IF (KKS.GT.KKE) GO TO 200 + DO 195 K=KKS,KKE + J=JLU(K) + V(J)=0.0D0 +195 CONTINUE +200 CONTINUE +C NORMAL RETURN + DLU0 = .FALSE. + RETURN +C ERROR RETURNS +210 IE=I + DLU0 = .TRUE. + RETURN +220 IE=IROW + DLU0 = .TRUE. + RETURN + END + SUBROUTINE DNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2, + * EPS,ITER,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N), + * R(N),P(N),S1(N),S2(N) +C NONSYMMETRIC CONJUGATE GRADIENT +C WHERE: +C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE DILUCG. +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU. +C JLU GIVES COLUMN NUMBER. +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U. +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U. +C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE +C ITERATIONS. +C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85. +C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE +C DILUCG). +C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT". +C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF +C NO CONVERGENCE. +C +C R0=B-A*X0 + CALL DMUL10(N,IA,JA,A,X,R) + DO 10 I=1,N + R(I)=B(I)-R(I) +10 CONTINUE +C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0 +C FIRST SOLVE L*LT*S1=R0 + CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C TIMES TRANSPOSE OF A + CALL DMUL20(N,IA,JA,A,S1,S2) +C THEN SOLVE UT*U*P=S2 + CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,P) + IE=0 +C INPROD IS DOT PRODUCT ROUTINE IN *LIBRARY (UBC MATRIX P 28) +C ALL CALLS ON IT COMMENTED OUT AND REPLACED WITH CALLS TO NEW +C ROUTINE DGVV(...); SOURCE CODE FOR LATTER ADDED TO END OF +C THIS FILE. - J. THORNBURG, 10/MAY/85. +C CALL INPROD(R,S1,EDOT,RDOT,N) + RDOT = DGVV(R,S1,N) +C LOOP BEGINS HERE +20 CALL DMUL30(N,ILU,JLU,ID,ALU,P,S2) +C CALL INPROD(P,S2,EDOT,PDOT,N) + PDOT = DGVV(P,S2,N) +C + IF (PDOT.EQ.0.0D0) RETURN +C + ALPHA=RDOT/PDOT +C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P) + XMAX=1.0D0 + XDIF=0.0D0 + DO 30 I=1,N + AP=ALPHA*P(I) + X(I)=X(I)+AP +C EQUATION 9PB X=X+ALPHA*P + AP=DABS(AP) + XX=DABS(X(I)) + IF (AP.GT.XDIF) XDIF=AP + IF (XX.GT.XMAX) XMAX=XX +30 CONTINUE + IE=IE+1 +C +C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89) +C + IF ((EPS .GT. 0.0D0) .AND. (XDIF .LE. EPS * XMAX)) RETURN + IF ((EPS .LT. 0.0D0) .AND. (XMAX + XDIF/DABS(EPS) .EQ. XMAX)) + * RETURN +C +C EXCEEDED ITERATION LIMIT? +C + IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60 + CALL DMUL10(N,IA,JA,A,P,S2) + DO 40 I=1,N + R(I)=R(I)-ALPHA*S2(I) +C EQUATION 9PC R=R-ALPHA*A*P +40 CONTINUE + CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C CALL INPROD(R,S1,EDOT,RRDOT,N) + RRDOT = DGVV(R,S1,N) + BETA=RRDOT/RDOT +C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R) + RDOT=RRDOT + CALL DMUL20(N,IA,JA,A,S1,S2) + CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,S1) + DO 50 I=1,N + P(I)=S1(I)+BETA*P(I) +C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P +50 CONTINUE + GO TO 20 +60 IE=-IE + RETURN + END + SUBROUTINE DMUL10(N,IA,JA,A,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 20 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + SUM=0.0D0 + DO 10 K=KS,KE + J=JA(K) + SUM=SUM+A(K)*B(J) +10 CONTINUE + X(I)=SUM +20 CONTINUE + RETURN + END + SUBROUTINE DMUL20(N,IA,JA,A,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0D0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + BB=B(I) + DO 20 K=KS,KE + J=JA(K) + X(J)=X(J)+A(K)*BB +20 CONTINUE +30 CONTINUE + RETURN + END + SUBROUTINE DMUL30(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS +C B IS THE VECTOR +C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0D0 +10 CONTINUE + DO 50 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + DIAG=1.0D0/ALU(KS-1) + XX=DIAG*B(I) + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + XX=XX+ALU(K)*B(J) +20 CONTINUE +30 X(I)=X(I)+DIAG*XX + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)+ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + SUBROUTINE DSUBU0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + XX=X(I)*ALU(KS-1) + X(I)=XX + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +20 CONTINUE +30 CONTINUE +C BACK SUBSTITUTION + DO 60 II=1,N + I=NP-II + KS=ID(I)+1 + KE=ILU(I+1)-1 + SUM=0.0D0 + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +40 CONTINUE +50 X(I)=(X(I)-SUM)*ALU(KS-1) +60 CONTINUE + RETURN + END + SUBROUTINE DSUBL0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU +C JLU GIVES THE COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 30 + SUM=0.0D0 + DO 20 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +20 CONTINUE + X(I)=X(I)-SUM +30 CONTINUE +C BACK SUBSTITUTION + DO 50 II=1,N + I=NP-II + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 50 + XX=X(I) + IF (XX.EQ.0.0) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + DOUBLE PRECISION FUNCTION DGVV(V,W,N) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION V(N),W(N) +C SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT. +C + SUM = 0.0D0 + DO 10 I = 1,N + SUM = SUM + V(I)*W(I) +10 CONTINUE + DGVV = SUM + RETURN + END diff --git a/src/elliptic/ilucg_wrapper.F77 b/src/elliptic/ilucg_wrapper.F77 new file mode 100644 index 0000000..d1ab803 --- /dev/null +++ b/src/elliptic/ilucg_wrapper.F77 @@ -0,0 +1,72 @@ +c ilucg_wrapper.F77 -- wrapper routines for [sd]ilucg() +c $Header$ + +c +c These subroutines are wrappers around the [sd]ilucg() functions. +c These subroutines take only integer/real/double precision arguments, +c avoiding problems with C/C++ --> Fortran handling of the logical +c result returned by [sd]ilucg(). +c +c Arguments: +c istatus = (out) This is the "IE" return value from [sd]ilucg() +c ierror = (out) This is returned as 0 for a normal return, +c nonzero for an error return. +c + +#include "config.h" + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + subroutine silucg_wrapper(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus, ierror) + integer n + integer ia(*), ja(*) + real a(*), b(*), x(*) + integer itemp(*) + real rtemp(*) + real eps + integer iter, istatus, ierror +c + logical silucg +c + ierror = 0 + if (silucg(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus)) ierror = 1 + return + end +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + subroutine dilucg_wrapper(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus, ierror) + integer n + integer ia(*), ja(*) + double precision a(*), b(*), x(*) + integer itemp(*) + double precision rtemp(*) + double precision eps + integer iter, istatus, ierror +c + logical dilucg +c + ierror = 0 + if (dilucg(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus)) ierror = 1 + return + end +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/elliptic/lapack.h b/src/elliptic/lapack.h index 1e86616..0927bff 100644 --- a/src/elliptic/lapack.h +++ b/src/elliptic/lapack.h @@ -4,7 +4,7 @@ /* * prerequisites: * "cctk.h" - * "config.hh" // for "integer" = Fortran integer + * "config.h" // for "integer" = Fortran integer */ #ifdef __cplusplus diff --git a/src/elliptic/lapack_wrapper.F77 b/src/elliptic/lapack_wrapper.F77 new file mode 100644 index 0000000..25796b3 --- /dev/null +++ b/src/elliptic/lapack_wrapper.F77 @@ -0,0 +1,59 @@ +c lapack_wrapper.f -- wrapper routines for LAPACK [sd]gecon() +c $Header$ + +c +c These subroutines are wrappers around the LAPACK [sd]gecon() subroutines. +c These subroutines take only integer/real/double precision arguments, +c avoiding problems with C/C++ --> Fortran passing of the character string +c arguments used by [sd]gecon(). +c +c Arguments: +c norm_int = (in) 0 ==> infinity-norm +c 1 ==> 1-norm +c + +#include "config.h" + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + subroutine sgecon_wrapper(norm_int, + $ N, A, LDA, anorm, rcond, + $ WORK, IWORK, info) + integer norm_int + integer N, LDA + real A(LDA,N) + real anorm, rcond + real WORK(*) + integer iwork(*) + integer info + if (norm_int .eq. 0) then + call sgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else if (norm_int .eq. 1) then + call sgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else + info = -1; + end if + return + end +#endif /* HAVE_DENSE_JACOBIAN__LAPACK */ + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + subroutine dgecon_wrapper(norm_int, + $ N, A, LDA, anorm, rcond, + $ WORK, IWORK, info) + integer norm_int + integer N, LDA + double precision A(LDA,N) + double precision anorm, rcond + double precision WORK(*) + integer iwork(*) + integer info + if (norm_int .eq. 0) then + call dgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else if (norm_int .eq. 1) then + call dgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else + info = -1; + end if + return + end +#endif /* HAVE_DENSE_JACOBIAN__LAPACK */ diff --git a/src/elliptic/make.code.defn b/src/elliptic/make.code.defn index 28308a0..553d372 100644 --- a/src/elliptic/make.code.defn +++ b/src/elliptic/make.code.defn @@ -3,8 +3,8 @@ # Source files in this directory SRCS = Jacobian.cc \ - dense_Jacobian.cc gecon_wrapper.F77 \ - row_sparse_Jacobian.cc + dense_Jacobian.cc lapack_wrapper.F77 \ + row_sparse_Jacobian.cc ilucg_wrapper.F77 ilucg.f77 # Subdirectories containing source files SUBDIRS = diff --git a/src/elliptic/row_sparse_Jacobian.cc b/src/elliptic/row_sparse_Jacobian.cc index 131655c..7d0be44 100644 --- a/src/elliptic/row_sparse_Jacobian.cc +++ b/src/elliptic/row_sparse_Jacobian.cc @@ -2,22 +2,28 @@ // $Header$ // +// <<<liberal contents of "ilucg.h">>> +// #ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity -// row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity -// row_sparse_Jacobian_sparsity::zero_matrix -// row_sparse_Jacobian_sparsity::note_new_row -// row_sparse_Jacobian_sparsity::note_element +// row_sparse_Jacobian::row_sparse_Jacobian +// row_sparse_Jacobian::~row_sparse_Jacobian +// row_sparse_Jacobian::element +// row_sparse_Jacobian::zero_matrix +// row_sparse_Jacobian::set_element +// row_sparse_Jacobian::sum_into_element +/// row_sparse_Jacobian::find_element +/// row_sparse_Jacobian::insert_element + #ifdef DEBUG_ROW_SPARSE_JACOBIAN +/// row_sparse_Jacobian::print_data_structure + #endif #endif // -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix -// row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix -// row_sparse_Jacobian_matrix::zero_matrix -// row_sparse_Jacobian_matrix::find_element -// row_sparse_Jacobian_matrix::start_new_row -// row_sparse_Jacobian_matrix::insert_element +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG +// row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG +// row_sparse_Jacobian__ILUCG::solve_linear_system #endif +// #include <stdio.h> #include <assert.h> @@ -52,16 +58,102 @@ using jtutil::error_exit; //****************************************************************************** //****************************************************************************** +// +// FIXME: +// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), +// so we include the contents of "lapack.h" inline here. This is a +// kludge! :( :( +//#include "ilucg.h" +// + +//***** begin "ilucg.h" contents ****** +/* ilucg.h -- C/C++ prototypes for [sd]ilucg() routines */ +/* $Header$ */ + +/* + * prerequisites: + * "cctk.h" + * "config.h" // for "integer" = Fortran integer + */ + +#ifdef __cplusplus +extern "C" + { +#endif + +/* + * ***** ILUCG ***** + */ +integer CCTK_FCALL + CCTK_FNAME(silucg)(const integer* N, + const integer ia[], const integer ja[], const float a[], + const float b[], float x[], + integer itemp[], float rtemp[], + const float* eps, const integer* iter, + integer* istatus); +integer CCTK_FCALL + CCTK_FNAME(dilucg)(const integer* N, + const integer ia[], const integer ja[], const double a[], + const double b[], double x[], + integer itemp[], double rtemp[], + const double* eps, const integer* iter, + integer* istatus); + +/* + * ***** wrappers (for returning logical results) ***** + */ +void CCTK_FCALL + CCTK_FNAME(silucg_wrapper) + (const integer* N, + const integer ia[], const integer ja[], const float a[], + const float b[], float x[], + integer itemp[], float rtemp[], + const float* eps, const integer* iter, + integer* istatus, integer* ierror); +void CCTK_FCALL + CCTK_FNAME(dilucg_wrapper) + (const integer* N, + const integer ia[], const integer ja[], const double a[], + const double b[], double x[], + integer itemp[], double rtemp[], + const double* eps, const integer* iter, + integer* istatus, integer* ierror); + +#ifdef __cplusplus + } /* extern "C" */ +#endif +//***** end "ilucg.h" contents ****** + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function constructs a row_sparse_Jacobian_sparsity object. +// This function constructs a row_sparse_Jacobian object. // -row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity - (patch_system& ps, - bool print_msg_flag /* = false */) - : Jacobian_sparsity(ps, print_msg_flag), - JJs_at_this_II_(new int[NN()]) +row_sparse_Jacobian::row_sparse_Jacobian(patch_system& ps, + bool print_msg_flag /* = false */) + : Jacobian(ps), + N_nonzeros_(0), + N_nonzeros_allocated_(FD_GRID__MOL_AREA * N_rows_), + // initial guess, insert_element() + // will grow if/when necessary + current_N_rows_(0), + IA_(new integer[N_rows_+1]), + JA_(new integer[N_nonzeros_allocated_]), + A_(new fp [N_nonzeros_allocated_]) { +if (print_msg_flag) + then { + CCTK_VInfo(CCTK_THORNSTRING, + "constructing row_sparse_Jacobian: N_rows_=%d", + N_rows_); + CCTK_VInfo(CCTK_THORNSTRING, + " initial N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + } + zero_matrix(); } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -70,11 +162,13 @@ zero_matrix(); #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function destroys a row_sparse_Jacobian_sparsity object. +// This function destroys a row_sparse_Jacobian object. // -row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity() +row_sparse_Jacobian::~row_sparse_Jacobian() { -delete[] JJs_at_this_II_; +delete[] A_; +delete[] JA_; +delete[] IA_; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -82,13 +176,13 @@ delete[] JJs_at_this_II_; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes the zeroing of the entire matrix +// This function gets a matrix element. // -void row_sparse_Jacobian_sparsity::zero_matrix() +fp row_sparse_Jacobian::element(int II, int JJ) + const { -N_nonzeros_ = 0; -current_II_ = 0; -N_JJs_at_this_II_ = 0; +const int fposn = find_element(II,JJ); +return (fposn > 0) ? fA(fposn) : 0.0; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -96,12 +190,16 @@ N_JJs_at_this_II_ = 0; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes that we're starting a new row in the matrix traversal. +// This function zeros a row_sparse_Jacobian object. // -void row_sparse_Jacobian_sparsity::note_new_row(int II) +void row_sparse_Jacobian::zero_matrix() { -current_II_ = II; -N_JJs_at_this_II_ = 0; +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf("row_sparse_Jacobian::zero_matrix()\n"); +#endif +N_nonzeros_ = 0; +current_N_rows_= 0; +fIA(1) = 1; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -109,120 +207,269 @@ N_JJs_at_this_II_ = 0; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes that the (II,JJ) matrix element needs to be stored. +// This function sets a matrix element to a specified value. // -void row_sparse_Jacobian_sparsity::note_element(int II, int JJ) +void row_sparse_Jacobian::set_element(int II, int JJ, fp value) { -if (II != current_II_) - then { - N_nonzeros_ += N_JJs_at_this_II_; // update totals for old row - note_new_row(II); - } - -// have we already seen this JJ in this row? - for (int jindex = 0 ; jindex < N_JJs_at_this_II_ ; ++jindex) - { - if (JJs_at_this_II_[jindex] == JJ) - then { - // yes, we've already seen it - // ==> no-op here - return; - } - } - -// get to here ==> no, we haven't seen this JJ before in this roW -// ==> add it to this row -if (N_JJs_at_this_II_ >= NN()) - then error_exit(ERROR_EXIT, -"***** row_sparse_Jacobian_sparsity():\n" -" row buffer overflowed (this should never happen!)\n" -" before insertion N_JJs_at_this_II_=%d >= NN()=%d\n" -" N_nonzeros_=%d current_II_=%d\n" -" II=%d JJ=%d\n" - , - N_JJs_at_this_II_, NN(), - N_nonzeros_, current_II_, - II, JJ); /*NOTREACHED*/ -JJs_at_this_II_[N_JJs_at_this_II_++] = JJ; +const int fposn = find_element(II,JJ); +if (fposn > 0) + then fA(fposn) = value; + else insert_element(II,JJ, value); } #endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** -//****************************************************************************** -//****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function constructs a row_sparse_Jacobian_matrix object. +// This function sums a value into a matrix element. // -row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix - (patch_system& ps, - row_sparse_Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) - : Jacobian_matrix(ps, JS), - N_nonzeros_(JS.N_nonzeros()), - A_(new fp[N_nonzeros_]), - IA_(new integer[NN()+1]), - JA_(new integer[N_nonzeros_]) +void row_sparse_Jacobian::sum_into_element(int II, int JJ, fp value) { -if (print_msg_flag) - then CCTK_VInfo(CCTK_THORNSTRING, - "row_sparse_Jacobian_matrix ctor: N_nonzeros_=%d", - N_nonzeros_); - -zero_matrix(); +const int fposn = find_element(II,JJ); +if (fposn > 0) + then fA(fposn) += value; + else insert_element(II,JJ, value); } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function destroys a row_sparse_Jacobian_matrix object. +// This function searches our data structures for element (II,JJ). +// If found, it returns the position (1-origin) in A_ and JA_. +// If not found, it returns 0. // -row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix() +int row_sparse_Jacobian::find_element(int II, int JJ) + const { -delete[] JA_; -delete[] IA_; -delete[] A_; +const int fII = fsub(II); +const int fJJ = fsub(JJ); + +if (fII > current_N_rows_) + then return 0; // this row not defined yet + +const int fstart = fIA(fII); +const int fstop = fIA(fII + 1); + for (int fposn = fstart ; fposn < fstop ; ++fposn) + { + if (fJA(fposn) == fJJ) + then return fposn; // found + } + +return 0; // not found } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function zeros a row_sparse_Jacobian_matrix object. +// This function inserts a new element in the matrix, starting a new +// row and/or growing the arrays if necessary. It should only be called +// if JJ isn't already in this row. // -void row_sparse_Jacobian_matrix::zero_matrix() +int row_sparse_Jacobian::insert_element(int II, int JJ, fp value) { -current_N_nonzeros_ = 0; -current_II_ = 0; -JA_[current_II_] = fsub(0); +const int fII = fsub(II); +const int fJJ = fsub(JJ); + +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf( + "row_sparse_Jacobian::insert_element(): fII=%d fJJ=%d\n", + fII, fJJ); +#endif + +if (fII != current_N_rows_) + then { + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" starting new row\n"); + #endif + assert(fII == current_N_rows_+1); + assert(current_N_rows_ < N_rows_); + ++current_N_rows_; + fIA(current_N_rows_+1) = fIA(current_N_rows_); + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + print_data_structure(); + #endif + } + +if (fIA(fII+1) > N_nonzeros_allocated_) + then { + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" growing arrays from N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + #endif + N_nonzeros_allocated_ += (N_nonzeros_allocated_ >> 1); + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" to %d\n", N_nonzeros_allocated_); + #endif + integer* const new_JA = new integer[N_nonzeros_allocated_]; + fp * const new_A = new fp [N_nonzeros_allocated_]; + for (int posn = 0 ; posn < N_nonzeros_ ; ++posn) + { + new_JA[posn] = JA_[posn]; + new_A[posn] = A_[posn]; + } + delete[] A_; + delete[] JA_; + JA_ = new_JA; + A_ = new_A; + } + +++N_nonzeros_; +const int fposn = fIA(fII+1)++; +fJA(fposn) = fJJ; + fA(fposn) = value; +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf(" storing at fposn=%d (new N_nonzeros_=%d)\n", fposn, N_nonzeros_); +#endif +return fposn; } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN +#ifdef DEBUG_ROW_SPARSE_JACOBIAN // -// This function searches our data structures for element (II,JJ). -// If found, it returns the position (0-origin) in A_ and JA_. -// If not found, it returns -1. +// This function prints the sparse-matrix data structure. // -int row_sparse_Jacobian_matrix::find_element(int II, int JJ) +void row_sparse_Jacobian::print_data_structure() const { -const int start = IA(II); -const int stop = IA(II+1); - for (int posn = start ; posn < stop ; ++posn) +printf("--- begin Jacobian printout\n"); + for (int fII = 1 ; fII <= current_N_rows_ ; ++fII) { - if (JA(posn) == JJ) - then return posn; // found + printf("fII=%d", fII); + const int fstart = fIA(fII); + const int fstop = fIA(fII + 1); + printf("\tfposn=[%d,%d):", fstart, fstop); + printf("\tfJJ ="); + for (int fposn = fstart ; fposn < fstop ; ++fposn) + { + printf(" %d", fJA(fposn)); + } + printf("\n"); } +printf("--- end Jacobian printout\n"); +} +#endif // DEBUG_ROW_SPARSE_JACOBIAN +#endif // HAVE_ROW_SPARSE_JACOBIAN + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This function constructs a row_sparse_Jacobian__ILUCG object. +// +row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG + (patch_system& ps, + bool print_msg_flag /* = false */) + : row_sparse_Jacobian(ps, print_msg_flag), + print_msg_flag_(print_msg_flag), + itemp_(NULL), + rtemp_(NULL) +{ } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG -return -1; // not found +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This function destroys a row_sparse_Jacobian__ILUCG object. +// +row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG() +{ +delete[] rtemp_; +delete[] itemp_; } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG // -// This function starts a new row in the matrix. +// This function solves the linear system J.x = rhs, with rhs and x +// being nominal-grid gridfns, using an ILUCG subroutine originally +// provided to me in 1985 (!) by Tom Nicol of the UBC Computing Center. +// It returns -1.0 (no condition number estimate). // -void row_sparse_Jacobian_matrix::start_new_row(int II) +fp row_sparse_Jacobian__ILUCG::solve_linear_system(int rhs_gfn, int x_gfn) { -// FIXME FIXME +assert(current_N_rows_ == N_rows_); + +// +// if this is our first call, allocate the scratch arrays +// +if (itemp_ == NULL) + then { + if (print_msg_flag_) + then { + CCTK_VInfo(CCTK_THORNSTRING, + "row_sparse_Jacobian__ILUCG::solve_linear_system()"); + CCTK_VInfo(CCTK_THORNSTRING, + " N_rows_=%d N_nonzeros_=%d", + N_rows_, N_nonzeros_); + CCTK_VInfo(CCTK_THORNSTRING, + " N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + } + itemp_ = new integer[3*N_rows_ + 3*N_nonzeros_ + 2]; + rtemp_ = new fp [4*N_rows_ + N_nonzeros_ ]; + } + +// +// set up the ILUCG subroutine +// + +// initial guess = all zeros +fp *x = ps_.gridfn_data(x_gfn); + for (int II = 0 ; II < N_rows_ ; ++II) + { + x[II] = 0.0; + } + +const integer N = N_rows_; +const fp *rhs = ps_.gridfn_data(rhs_gfn); +const fp eps = -1000.0; // solve to 1000 * machine epsilon +const integer max_iterations = 0; // no limit on iterations +integer istatus, ierror; + +// the actual linear solution +#if defined(FP_IS_FLOAT) + CCTK_FNAME(silucg_wrapper)(&N, + IA_, JA_, A_, + rhs, x, + itemp_, rtemp_, + &eps, &max_iterations, + &istatus, &ierror); +#elif defined(FP_IS_DOUBLE) + CCTK_FNAME(dilucg_wrapper)(&N, + IA_, JA_, A_, + rhs, x, + itemp_, rtemp_, + &eps, &max_iterations, + &istatus, &ierror); +#else + #error "don't know fp datatype!" +#endif + +if (ierror != 0) + then CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING, +"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" +" error return istatus=%d from [sd]ilucg() routine!" + , + rhs_gfn, x_gfn, + int(istatus)); /*NOTREACHED*/ +if (print_msg_flag_) + then CCTK_VInfo(CCTK_THORNSTRING, + " solution found in %d CG iterations", + istatus); + +return -1.0; // no condition number estimate available } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/elliptic/row_sparse_Jacobian.hh b/src/elliptic/row_sparse_Jacobian.hh index 6e4d962..2eb6f6d 100644 --- a/src/elliptic/row_sparse_Jacobian.hh +++ b/src/elliptic/row_sparse_Jacobian.hh @@ -3,8 +3,10 @@ // #ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_sparsity -- (no-op) accumulate ... sparsity info -// row_sparse_Jacobian_matrix -- Jacobian stored as row-oriented sparse matrix +// row_sparse_Jacobian -- Jacobian stored as row-oriented sparse matrix +#endif +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// row_sparse_Jacobian__ILUCG -- row_sparse_Jacobian with ILUCG linear solver #endif // @@ -14,183 +16,187 @@ // "Jacobian.hh" // +#undef DEBUG_ROW_SPARSE_JACOBIAN // defined this for *very* detailed + // printing of data structures + //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN +// +// This class stores the Jacobian as a row-oriented sparse matrix. // -// The row-oriented sparse matrix format is widely used by sparse matrix -// libraries. In this format, the matrix is represented by 3 arrays: -// A[N_nonzeros] = nonzero matrix elements, ordered by rows; +// This sparse matrix format is widely used by sparse matrix libraries. +// The format is as follows: The matrix is represented by 3 arrays +// (where () denote Fortran 1-origin subscripting): +// IA(N_rows+1) = Fortran (1-origin) indices in A of the start +// of each row's nonzeros; IA(N_rows+1) = N_nonzeros + 1 +// JA(N_nonzeros) = Fortran (1-origin) column numbers of the +// corresponding entries in A +// A(N_nonzeros) = nonzero matrix elements, ordered by rows; // within a row the matrix elements may be in // any order -// IA[N_rows+1] = Fortran (1-origin) indices in A of the start -// of each row's nonzeros -// JA[N_nonzeros] = Fortran (1-origin) column numbers of the -// corresponding entries in A // IA and JA are arrays of integer so as to be storage-compatible with // Fortran. // - -//****************************************************************************** - -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// -// This class accumulates the sparsity pattern of a row-oriented -// sparse-matrix Jacobian. We assume that the Jacobian is traversed -// by rows. +// For example, the matrix +// [ 11.0 12.0 13.0 16.0 ] +// [ 21.0 22.0 24.0 25.0 ] +// [ 32.0 34.0 ] +// [ 42.0 44.0 46.0 ] +// [ 53.0 55.0 ] +// [ 61.0 64.0 66.0 ] +// could be represented by the arrays (Fortran indexing) +// IA( 1) = 1 +// JA( 1) = 1 JA( 2) = 2 JA( 3) = 3 JA( 4) = 6 +// A( 1) = 11.0 A( 2) = 12.0 A( 3) = 13.0 A( 4) = 16.0 +// IA( 2) = 5 +// JA( 5) = 1 JA( 6) = 2 JA( 7) = 4 JA( 8) = 5 +// A( 5) = 21.0 A( 6) = 22.0 A( 7) = 24.0 A( 8) = 25.0 +// IA( 3) = 9 +// JA( 9) = 2 JA(10) = 4 +// A( 9) = 32.0 A(10) = 34.0 +// IA( 4) = 11 +// JA(11) = 2 JA(12) = 4 JA(13) = 6 +// A(11) = 42.0 A(12) = 44.0 A(13) = 46.0 +// IA( 5) = 14 +// JA(14) = 3 JA(15) = 5 +// A(14) = 53.0 A(15) = 55.0 +// IA( 6) = 16 +// JA(16) = 1 JA(17) = 4 JA(18) = 6 +// A(16) = 61.0 A(17) = 64.0 A(18) = 66.0 +// IA( 7) = 19 // -class row_sparse_Jacobian_sparsity - : public Jacobian_sparsity +class row_sparse_Jacobian + : public Jacobian { public: - // record the zeroing of the entire matrix + // + // routines to access the matrix + // + + // get a matrix element + fp element(int II, int JJ) const; + + // is the matrix element (II,JJ) stored explicitly? + bool is_explicitly_stored(int II, int JJ) const + { return find_element(II,JJ) > 0; } + + + // + // routines for setting values in the matrix + // + + // zero the entire matrix void zero_matrix(); - // record the setting of a matrix element to a specified value - void set_element(int II, int JJ, fp value) - { note_element(II, JJ); } + // set a matrix element to a specified value + void set_element(int II, int JJ, fp value); - // record the summing of a value into a matrix element - void sum_into_element(int II, int JJ, fp value) - { note_element(II, JJ); } + // sum a value into a matrix element + void sum_into_element(int II, int JJ, fp value); - // read out total number of nonzeros - int N_nonzeros() const - { return N_nonzeros_; } + // // constructor, destructor - row_sparse_Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false); - ~row_sparse_Jacobian_sparsity(); - -private: - // note start of a new row in the matrix traversal - void note_new_row(int II); + // +protected: + row_sparse_Jacobian(patch_system& ps, + bool print_msg_flag = false); + ~row_sparse_Jacobian(); - // note that the (II,JJ) matrix element needs to be stored - void note_element(int II, int JJ); -private: // - // Our goal is to count the total number of distinct matrix - // elements stored. The problem is, there's no easy way to - // know what size buffers to allocate to keep track of them. - // We could use growing buffers, but instead we keep track of - // the set of distinct columns (JJ) seen within each row (II). - // (Here a buffer of size NN() is guaranteed to be sufficient.) - // Then at the end of each row we can update our running total. - // - int N_nonzeros_; - int current_II_; - int N_JJs_at_this_II_; - - // --> new[]-allocated array of JJ values for nonzeros in this row - // only values [0, N_JJs_at_this_II) used - int* JJs_at_this_II_; - }; -#endif // HAVE_ROW_SPARSE_JACOBIAN + // internal routines + // +protected: + // return position (1-origin) of (II,JJ) in A_ and JA_ + // return 0 if not found + int find_element(int II, int JJ) const; -//****************************************************************************** + // insert new array element (II,JJ) in matrix, + // starting new row and/or growing arrays if necessary + // return position (1-origin) in A_ and JA_ + int insert_element(int II, int JJ, fp value); -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// -// This class stores the Jacobian as a row-oriented sparse matrix. -// -class row_sparse_Jacobian_matrix - : public Jacobian_matrix - { -public: - // zero the entire matrix - void zero_matrix(); +#ifdef DEBUG_ROW_SPARSE_JACOBIAN + // print data structure (IA_ and JA_ arrays) + void print_data_structure() const; +#endif - // set a matrix element to a specified value - void set_element(int II, int JJ, fp value) + // access IA[] and JA[] and A[] using 1-origin Fortran indices + int& fIA(int fII) const { - const int posn = find_element(II, JJ); - A_[posn] = value; + assert(fII >= 1); + assert(fII <= current_N_rows_+1); + return IA_[csub(fII)]; } - - // sum a value into a matrix element - void sum_into_element(int II, int JJ, fp value) + int& fJA(int fposn) const { - const int posn = find_element(II, JJ); - A_[posn] += value; + assert(fposn >= 1); + assert(fposn <= N_nonzeros_); + return JA_[csub(fposn)]; } - - // access (get) a matrix element - fp element(int II, int JJ) const + fp& fA(int fposn) const { - const int posn = find_element(II, JJ); - return (posn >= 0) ? A_[posn] : 0.0; + assert(fposn >= 1); + assert(fposn <= N_nonzeros_); + return A_[csub(fposn)]; } - // this is a dense matrix ==> all values are stored explicitly - bool is_explicitly_stored(int II, int JJ) const - { return find_element(II,JJ) >= 0; } +protected: + // actual size, size allocated for JA_ and A_ + int N_nonzeros_, N_nonzeros_allocated_; + + // at present rows 1 <= fii <= current_N_rows_ are valid + int current_N_rows_; - // solve linear system J.x = rhs via LAPACK + // --> new[]-allocated arrays + // ***** due to constructor initialization list ordering, + // ***** these must be declared *AFTER* N_nonzeros_allocated_ + integer* IA_; // size N_rows_+1 + // valid for Fortran indices + // 1 <= fII <= current_N_rows_+1 + // ... these are grown (reallocated) as necessary by insert_element() + // ... using STL vector would be much cleaner here, but alas + // there are too many broken compilers/linkers in the world + // world don't fully grok vector :( :( + integer* JA_; // size N_nonzeros_allocated_ + // valid for Fortran indices + // 1 <= fposn < IA(current_N_rows_) + fp* A_; // ditto + }; +#endif // HAVE_ROW_SPARSE_JACOBIAN + +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This class defines the linear solver routine using ILUCG. +// +class row_sparse_Jacobian__ILUCG + : public row_sparse_Jacobian + { +public: + // solve linear system J.x = rhs via ILUCG // ... rhs and x are nominal-grid gridfns // ... overwrites Jacobian matrix with its LU decomposition // ... returns condition number if known, // 0.0 if matrix is numerically singular, // -1.0 if condition number is unknown - fp solve_linear_system(int rhs_gfn, int x_gfn) - { return -1.0; }; // FIXME + fp solve_linear_system(int rhs_gfn, int x_gfn); // constructor, destructor public: - row_sparse_Jacobian_matrix(patch_system& ps, - row_sparse_Jacobian_sparsity& JS, + row_sparse_Jacobian__ILUCG(patch_system& ps, bool print_msg_flag = false); - ~row_sparse_Jacobian_matrix(); - -private: - // return posn (0-origin) of (II,JJ) in A_ and JA_, or -1 if not found - int find_element(int II, int JJ) const; - - // start a new row in the matrix - void start_new_row(int II); - - // insert new array element (II,JJ) in current row - // return posn (0-origin) in A_ and JA_ - void insert_element(int II, int JJ, fp value); + ~row_sparse_Jacobian__ILUCG(); private: - // IA[] and JA[], but returning 0-origin indices - int IA(int II) const - { - assert(II >= 0); - assert(II < NN()+1); - return IA_[II] - 1; - } - int JA(int posn) const - { - assert(posn >= 0); - assert(posn < N_nonzeros_); - return JA_[posn] - 1; - } + bool print_msg_flag_; - // values in IA_[] and JA_[] are semantically Fortran subscripts - // this function converts a C subscript to a Fortran subscript - int fsub(int csub) const - { return csub + 1; } - -private: - int N_nonzeros_; - - // - // invariants while we're traversing the matrix: - // A_[0,current_N_nonzeros_) is valid - // IA_[0,current_II] is valid - // JA_[0,current_N_nonzeros_) is valid - // - int current_N_nonzeros_; - int current_II_; - - // --> new[]-allocated arrays - // ***** due to constructor initialization list ordering, - // ***** these must be declared *AFTER* N_nonzeros_ - fp* A_; // size N_nonzeros_; - integer* IA_; // size NN() + 1 - integer* JA_; // size N_nonzeros_; + // work vectors for ILUCG subroutine + // ... allocated by solve_linear_system() on first call + integer* itemp_; + fp* rtemp_; }; -#endif // HAVE_ROW_SPARSE_JACOBIAN +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG |