diff options
author | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-01-22 12:27:46 +0000 |
---|---|---|
committer | jthorn <jthorn@f88db872-0e4f-0410-b76b-b9085cfa78c5> | 2003-01-22 12:27:46 +0000 |
commit | 8b0a74a327fbecb12f3d32ecff1fa28a6a6c2a15 (patch) | |
tree | 434af140b12333ff085ea5c5983d46387cbe6267 | |
parent | 82e2251230c9dc90d1b4650bafc85e75bad61ad7 (diff) |
* add support for sparse-matrix Jacobians ==> works!
* change default in param.ccl to use this
* change default in src/include/config.h to default to no longer
link in LAPACK routines
* update documentation
git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinderDirect/trunk@931 f88db872-0e4f-0410-b76b-b9085cfa78c5
33 files changed, 2213 insertions, 770 deletions
@@ -69,55 +69,12 @@ Most of this thorn's relativity code is machine-generated using Maple (version 7), but you don't need Maple unless you want to modify the relativity code. -This thorn uses the LAPACK library (which in turn uses the BLAS library). -Thus you need to configure your Cactus with LAPACK=yes. -[If you don't have LAPACK/BLAS installed on your system already, you -can get Fortran 77 source code and/or binaries for various architectures, -from http://www.netlib.org/lapack/ and http://www.netlib.org/blas/ -respectively. But many systems have these installed already -- try -'locate liblapack' and 'locate libblas' on your system.] - - -Library Configuration -===================== - -When configuring your Cactus configuration, you need to set - LAPACK=yes . -If all goes well, Cactus will find the LAPACK and BLAS libraries and give -during the configure process saying that it found them. Otherwise, the -configure process will abort with a message saying that you need to set -the configure variable LAPACK_DIR to a directory (or blank-separated list -of directories) containing the LAPACK and BLAS libraries. - -If the LAPACK and/or BLAS libraries were compiled with a (Fortran) -compiler which is *not* used to compile any part of this Cactus' -configuration, then you may also need to set the configure variables - LAPACK_EXTRA_LIBS = the name (or blank-separated list of names) of - that (Fortran) compiler's run-time support - library(ies) - LAPACK_EXTRA_LIB_DIRS = the directory (or blank-separated list of - directories) containing that library(ies) - -For example, if your LAPACK and/or BLAS were compiled with the GNU g77 -compiler (as is common on GNU/Linux and *BSD systems), and you are *not* -using g77 to compile any part of your Cactus configuration (maybe because -you're using a different Fortran compiler), then you may need to set the -environment variables to point to the g77 support library g2c: - LAPACK_EXTRA_LIBS = g2c - LAPACK_EXTRA_LIBDIRS = `g77 --print-file-name=libg2c.a | xargs basename` - -For example, on the AEI xeons the settings would be - LAPACK_EXTRA_LIBS = g2c - LAPACK_EXTRA_LIBDIRS = `g77 --print-file-name=libg2c.a | xargs dirname` - -All of these settings are of configure variables, i.e. you set them either -on the command line when configuring, - % gmake my-configuration LAPACK=yes -or as assignments in your ~/.cactus/config file, - % cat ~/.cactus/config - LAPACK=yes - LAPACK_EXTRA_LIBS=g2c - LAPACK_EXTRA_LIB_DIRS=/usr/lib/gcc-lib/i386-redhat-linux/2.96 +By default, this thorn doesn't use any external libraries. However, +if HAVE_DENSE_JACOBIAN__LAPACK is defined in src/include/configure.h, +then this thorn uses the LAPACK library (which in turn uses the BLAS +library), so you will need to configure your Cactus to use LAPACK. +Instructions on doing this are in the file README.library in this +directory. Code Notes @@ -138,8 +95,8 @@ In particular: alas none of the more modern forms (<cstdio> and namespace std::) seem to be supported on as wide a range of systems as the pre-namespaces form.) -* To avoid various portability problems, the C++ standard template - library (STL) isn't used. +* To avoid various portability problems, none of the C++ standard + template library (STL) is used. Compiler Notes diff --git a/README.library b/README.library new file mode 100644 index 0000000..2b7fad0 --- /dev/null +++ b/README.library @@ -0,0 +1,56 @@ +Library Configuration +===================== +$Header$ + +By default this thorn doesn't use any external libraries, and you +can ignore the instructions in this file. + +However, if this thorn is configured to use the LAPACK and BLAS +libraries (see README for details), then you need to configure Cactus +to use these libraries; this file describes how to do this. + +Basically, you just need to set + LAPACK=yes +when configuring your Cactus configuration. + +If all goes well, Cactus will find the LAPACK and BLAS libraries and give +during the configure process saying that it found them. Otherwise, the +configure process will abort with a message saying that you need to set +the configure variable LAPACK_DIR to a directory (or blank-separated list +of directories) containing the LAPACK and BLAS libraries. + +[If you don't have LAPACK/BLAS installed on your system already, you +can get Fortran 77 source code and/or binaries for various architectures, +from http://www.netlib.org/lapack/ and http://www.netlib.org/blas/ +respectively. But many systems have these installed already -- try +'locate liblapack' and 'locate libblas' on your system.] + +If the LAPACK and/or BLAS libraries were compiled with a (Fortran) +compiler which is *not* used to compile any part of this Cactus' +configuration, then you may also need to set the configure variables + LAPACK_EXTRA_LIBS = the name (or blank-separated list of names) of + that (Fortran) compiler's run-time support + library(ies) + LAPACK_EXTRA_LIB_DIRS = the directory (or blank-separated list of + directories) containing that library(ies) + +For example, if your LAPACK and/or BLAS were compiled with the GNU g77 +compiler (as is common on GNU/Linux and *BSD systems), and you are *not* +using g77 to compile any part of your Cactus configuration (maybe because +you're using a different Fortran compiler), then you may need to set the +environment variables to point to the g77 support library g2c: + LAPACK_EXTRA_LIBS = g2c + LAPACK_EXTRA_LIBDIRS = `g77 --print-file-name=libg2c.a | xargs basename` + +For example, on the AEI xeons the settings would be + LAPACK_EXTRA_LIBS = g2c + LAPACK_EXTRA_LIBDIRS = `g77 --print-file-name=libg2c.a | xargs dirname` + +All of these settings are of configure variables, i.e. you set them either +on the command line when configuring, + % gmake my-configuration LAPACK=yes +or as assignments in your ~/.cactus/config file, + % cat ~/.cactus/config + LAPACK=yes + LAPACK_EXTRA_LIBS=g2c + LAPACK_EXTRA_LIB_DIRS=/usr/lib/gcc-lib/i386-redhat-linux/2.96 @@ -4,8 +4,6 @@ small things medium things compute Gaussian curvature, cf AHFinder/src/AHFinder_gau.F - lib/make/extras/LAPACK/ shell scripts to replace environment variables - LAPACK_DIR, LAPACK_EXTRA_LIBS, and LAPACK_EXTRA_LIBDIRS there should be a Cactus test suite HDF5 data files (simple format and/or Werner Benger's fancy one) move origin point to track moving BHs @@ -29,7 +27,6 @@ medium things large things multi-processor (interpolator, overall control) - sparse matrix elliptic solver handle quadrant/octant grids with rotating BCs (needs Jacobian of one ghost zone to cover two different ghost zones) "isolated horizons" computation of BH mass and spin diff --git a/doc/documentation.tex b/doc/documentation.tex index ba0439c..bc7e415 100644 --- a/doc/documentation.tex +++ b/doc/documentation.tex @@ -315,16 +315,12 @@ with various compilers. See the ``Code Notes'' and ``Compiler Notes'' sections in the top-level \code{README} file for details and lists of compilers currently known to be ok or not. -\thorn{AHFinderDirect} uses the LAPACK library for solving linear -equations, so you need to configure Cactus with \verb|LAPACK=yes|. -Cactus can usually find the LAPACK libraries itself; if not it will -tell you (at configure time) that you also need to set the Cactus -configuration variable \verb|LAPACK_DIR| to tell it where the -libraries live. If the LAPACK libraries were compiled by a different -Fortran compiler than you're using to compile Cactus, then you may also -have to set the Cactus configuration variables \verb|LAPACK_EXTRA_LIBS| -and \verb|LAPACK_EXTRA_LIB_DIRS|. See the top-level \verb|README| -file for details and examples. +By default \thorn{AHFinderDirect} doesn't use any external libraries. +However, if \code{HAVE\_DENSE\_JACOBIAN\_\_LAPACK} is defined in +\code{src/include/config.h}, then \thorn{AHFinderDirect} uses the +LAPACK library for solving linear equations. In this case you need +to configure Cactus with \code{LAPACK=yes}. See the top-level +\code{README} and \code{README.library} files for details on this. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -108,7 +108,7 @@ boolean print_timing_stats \ # ***** parameters for the Jacobian matrix ***** # -keyword Jacobian_method "how do we compute the Jacobian matrix?" +keyword Jacobian_compute_method "how do we compute the Jacobian matrix?" { # for debugging only" "numerical perturbation" :: "n.b. this is *very* slow" @@ -123,17 +123,22 @@ keyword Jacobian_method "how do we compute the Jacobian matrix?" } "symbolic differentiation with finite diff d/dr" # -# This parameter lists all known storage methods. See +# This parameter lists all known methods for storing the Jacobian and +# solving linear systems with the Jacobian as the LHS matrix. See # "src/include/config.hh" for which of these are actually compiled in at # the moment. N.b. each compiled-in method requires linking with the # corresponding linear-solver library; see "src/make.configuration.defn" # for details on these libraries. # -keyword Jacobian_storage_method "how do we store the Jacobian matrix?" +keyword Jacobian_store_solve_method \ + "how do we store/linear-solve the Jacobian matrix?" { -"dense matrix" :: "dense matrix (inefficient at high angular resolution)" -"row-oriented sparse matrix" :: "sparse matrix, row-oriented storage format" -} "dense matrix" +"dense matrix/LAPACK" :: \ + "store as (Fortran) dense matrix, solve with LAPACK routines" +"row-oriented sparse matrix/ILUCG" :: \ + "store as sparse matrix (row-oriented storage format), \ + solve with ILUCG (incomplete LU decomposition - conjugate gradient) method" +} "row-oriented sparse matrix/ILUCG" # # This parameter controls two different sorts of one-sided finite @@ -156,7 +161,7 @@ real Jacobian_perturbation_amplitude \ # if AHFinderDirect::method = "test Jacobian", should we test all # known methods for computing the Jacobian, or just the numerical perturbation # method (the latter may be useful of some other methods are broken) -boolean test_all_Jacobian_methods \ +boolean test_all_Jacobian_compute_methods \ "should we test all Jacobian computation methods, or just NP?" { } "true" @@ -441,35 +446,7 @@ keyword patch_system_type[5] "what type of patch system should we use?" } "match Cactus grid symmetry" # -# This parameter sets the width of the interpatch ghost zones in the -# patch system. Note that this thorn uses the terminology "ghost zone" -# for any of what Cactus in general now calls a "boundary zone" or a -# "symmetry zone" or a "patch zone". -# -# This parameter must be at least -# ... 2 if FINITE_DIFF_ORDER is set to 4 in "src/include/config.hh" -# ... 1 if FINITE_DIFF_ORDER is set to 2 in "src/include/config.hh" -# The code checks for this being too small, and reports a fatal error if so. -# -int ghost_zone_width "number of ghost zones on each side of a patch" -{ -0:* :: "any integer >= 0" -} 2 - -# -# Our code that computes surface integrals over patches (used for -# computing BH diagnostics like centroids, areas, masses, etc) silently -# assumes that this parameter is == 1, so you should probably leave -# it at that setting. -# -int patch_overlap_width \ - "number of grid points that nominally-just-touching patches should overlap" -{ -1:*:2 :: "any integer >= 0; current implementation requires that it be odd" -} 1 - -# -# This parameter sets the angular resolution of all the patch systems: +# This parameter sets the angular resolution of each patch systems: # the angular grid spacing in degrees is 90.0/N_zones_per_right_angle. # # In practice the error in the horizon position is usually dominated @@ -496,10 +473,38 @@ int patch_overlap_width \ # the 4th/6th power of this parameter! For example, doubling the resolution # takes 16 times as much memory, and 64 times as long to run! # -int N_zones_per_right_angle "sets angular resolution of patch systems" +int N_zones_per_right_angle[5] "sets angular resolution of patch systems" { 1:* :: "any integer >= 1; must be even for patch systems other than full-sphere" -} 12 +} 18 + +# +# This parameter sets the width of the interpatch ghost zones in the +# patch system. Note that this thorn uses the terminology "ghost zone" +# for any of what Cactus in general now calls a "boundary zone" or a +# "symmetry zone" or a "patch zone". +# +# This parameter must be at least +# ... 2 if FINITE_DIFF_ORDER is set to 4 in "src/include/config.hh" +# ... 1 if FINITE_DIFF_ORDER is set to 2 in "src/include/config.hh" +# The code checks for this being too small, and reports a fatal error if so. +# +int ghost_zone_width "number of ghost zones on each side of a patch" +{ +0:* :: "any integer >= 0" +} 2 + +# +# Our code that computes surface integrals over patches (used for +# computing BH diagnostics like centroids, areas, masses, etc) silently +# assumes that this parameter is == 1, so you should probably leave +# it at that setting. +# +int patch_overlap_width \ + "number of grid points that nominally-just-touching patches should overlap" +{ +1:*:2 :: "any integer >= 0; current implementation requires that it be odd" +} 1 ################################################################################ diff --git a/run/test-ahfinderdirect/misc/Kerr.par b/run/test-ahfinderdirect/misc/Kerr.par index 003c5ad..abcc41c 100644 --- a/run/test-ahfinderdirect/misc/Kerr.par +++ b/run/test-ahfinderdirect/misc/Kerr.par @@ -26,7 +26,6 @@ ADMBase::initial_data = "exact" ADMBase::lapse_evolution_method = "static" ADMBase::shift_evolution_method = "static" ADMBase::metric_type = "physical" - Exact::exact_model = "Kerr/Kerr-Schild" Exact::Kerr_KerrSchild__mass = 1.0 Exact::Kerr_KerrSchild__spin = 0.6 @@ -49,7 +48,7 @@ AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::initial_guess_method = "coordinate sphere" +AHFinderDirect::initial_guess_method[1] = "coordinate sphere" AHFinderDirect::initial_guess__coord_sphere__x_center[1] = -0.2 AHFinderDirect::initial_guess__coord_sphere__y_center[1] = 0.3 AHFinderDirect::initial_guess__coord_sphere__z_center[1] = 0.0 diff --git a/run/test-ahfinderdirect/misc/try-10.par b/run/test-ahfinderdirect/misc/try-10.par index cee27b3..38f1d8c 100644 --- a/run/test-ahfinderdirect/misc/try-10.par +++ b/run/test-ahfinderdirect/misc/try-10.par @@ -42,7 +42,7 @@ AHFinderDirect::N_horizons = 1 AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::N_zones_per_right_angle = 10 +AHFinderDirect::N_zones_per_right_angle[1] = 10 AHFinderDirect::initial_guess_method = "coordinate sphere" AHFinderDirect::initial_guess__coord_sphere__x_center[1] = -0.2 diff --git a/run/test-ahfinderdirect/misc/try-12-debug.par b/run/test-ahfinderdirect/misc/try-12-debug.par index 0cbe3d6..9ae2f0e 100644 --- a/run/test-ahfinderdirect/misc/try-12-debug.par +++ b/run/test-ahfinderdirect/misc/try-12-debug.par @@ -44,7 +44,7 @@ AHFinderDirect::N_horizons = 1 AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::N_zones_per_right_angle = 12 +AHFinderDirect::N_zones_per_right_angle[1] = 12 AHFinderDirect::initial_guess_method = "coordinate sphere" AHFinderDirect::initial_guess__coord_sphere__x_center[1] = -0.2 diff --git a/run/test-ahfinderdirect/misc/try-12-horizon.par b/run/test-ahfinderdirect/misc/try-12-horizon.par index 67a32e2..aced946 100644 --- a/run/test-ahfinderdirect/misc/try-12-horizon.par +++ b/run/test-ahfinderdirect/misc/try-12-horizon.par @@ -45,7 +45,7 @@ AHFinderDirect::N_horizons = 1 AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::N_zones_per_right_angle = 12 +AHFinderDirect::N_zones_per_right_angle[1] = 12 AHFinderDirect::initial_guess_method = "Kerr/Kerr-Schild" AHFinderDirect::initial_guess__Kerr_KerrSchild__x_posn[1] = 0.0 diff --git a/run/test-ahfinderdirect/misc/try-12.par b/run/test-ahfinderdirect/misc/try-12.par index f5b67ca..390242a 100644 --- a/run/test-ahfinderdirect/misc/try-12.par +++ b/run/test-ahfinderdirect/misc/try-12.par @@ -42,7 +42,7 @@ AHFinderDirect::N_horizons = 1 AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::N_zones_per_right_angle = 12 +AHFinderDirect::N_zones_per_right_angle[1] = 12 AHFinderDirect::initial_guess_method = "coordinate sphere" AHFinderDirect::initial_guess__coord_sphere__x_center[1] = -0.2 diff --git a/run/test-ahfinderdirect/misc/try-18.par b/run/test-ahfinderdirect/misc/try-18.par index 39d2a8a..b88eaed 100644 --- a/run/test-ahfinderdirect/misc/try-18.par +++ b/run/test-ahfinderdirect/misc/try-18.par @@ -41,7 +41,7 @@ AHFinderDirect::N_horizons = 1 AHFinderDirect::origin_x[1] = 0.5 AHFinderDirect::origin_y[1] = 0.7 AHFinderDirect::origin_z[1] = 0.0 -AHFinderDirect::N_zones_per_right_angle = 18 +AHFinderDirect::N_zones_per_right_angle[1] = 18 AHFinderDirect::initial_guess_method = "coordinate sphere" AHFinderDirect::initial_guess__coord_sphere__x_center[1] = -0.2 diff --git a/src/driver/Newton.cc b/src/driver/Newton.cc index 622bece..ed80cf1 100644 --- a/src/driver/Newton.cc +++ b/src/driver/Newton.cc @@ -60,7 +60,7 @@ using jtutil::error_exit; // otherwise it's false. // bool Newton_solve(patch_system& ps, - Jacobian_matrix& Jac, + Jacobian& Jac, const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct Jacobian_info& Jacobian_info, @@ -135,7 +135,7 @@ const int max_Newton_iterations if (verbose_info.print_algorithm_details) then CCTK_VInfo(CCTK_THORNSTRING, "solving linear system for Delta_h (%d unknowns)", - Jac.NN()); + Jac.N_rows()); const fp rcond = Jac.solve_linear_system(gfns::gfn__Theta, gfns::gfn__Delta_h); if (rcond == 0.0) diff --git a/src/driver/driver.hh b/src/driver/driver.hh index 9747002..bbee5aa 100644 --- a/src/driver/driver.hh +++ b/src/driver/driver.hh @@ -181,7 +181,7 @@ struct BH_diagnostics struct AH_info { patch_system* ps_ptr; - Jacobian_matrix* Jac_ptr; + Jacobian* Jac_ptr; struct initial_guess_info initial_guess_info; @@ -245,7 +245,7 @@ enum initial_guess_method // Newton.cc // returns true for success, false for failure to converge bool Newton_solve(patch_system& ps, - Jacobian_matrix& Jac, + Jacobian& Jac, const struct cactus_grid_info& cgi, const struct geometry_info& gi, const struct Jacobian_info& Jacobian_info, @@ -263,8 +263,8 @@ void output_gridfn(patch_system& ps, int unknown_gfn, const struct IO_info& IO_info, const char base_file_name[], int hn, bool print_msg_flag, int AHF_iteration = 0); void output_Jacobians(const patch_system& ps, - const Jacobian_matrix* Jac_NP_ptr, - const Jacobian_matrix* Jac_SD_FDdr_ptr, + const Jacobian* Jac_NP_ptr, + const Jacobian* Jac_SD_FDdr_ptr, const struct IO_info& IO_info, const char base_file_name[], int hn, bool print_msg_flag, int AHF_iteration = 0); FILE* setup_BH_diagnostics_output_file(const struct IO_info& IO_info, diff --git a/src/driver/find_horizons.cc b/src/driver/find_horizons.cc index 45710dd..0d1365f 100644 --- a/src/driver/find_horizons.cc +++ b/src/driver/find_horizons.cc @@ -77,10 +77,10 @@ bool do_evaluate_expansion bool do_test_expansion_Jacobian (const struct verbose_info& verbose_info, int timer_handle, struct IO_info& IO_info, - bool test_all_Jacobian_methods, + bool test_all_Jacobian_compute_methods, struct Jacobian_info& Jac_info, struct cactus_grid_info& cgi, struct geometry_info& gi, - patch_system& ps, Jacobian_matrix& Jac, + patch_system& ps, Jacobian& Jac, bool active_flag, int hn, int N_horizons); bool do_find_horizon (const struct verbose_info& verbose_info, int timer_handle, @@ -88,7 +88,7 @@ bool do_find_horizon const struct solver_info& solver_info, bool initial_find_flag, const struct Jacobian_info& Jac_info, struct cactus_grid_info& cgi, struct geometry_info& gi, - patch_system& ps, Jacobian_matrix& Jac, + patch_system& ps, Jacobian& Jac, bool active_flag, int hn, int N_horizons); void compute_BH_diagnostics @@ -179,15 +179,33 @@ if (state.gi.geometry_method == geometry__local_interp_from_Cactus_grid) .print_algorithm_highlights); } - // if we're going to need a Jacobian - // and it doesn't exist, create it. - if ( (state.method != method__evaluate_expansion) - && (AH_info.Jac_ptr == NULL) ) - then AH_info.Jac_ptr - = create_Jacobian_matrix(ps, - state.cgi, state.gi, - state.Jac_info, - verbose_info.print_algorithm_details); + // + // create the Jacobian matrix if we're going to need it + // and it doesn't already exist + // + switch (state.method) + { + case method__evaluate_expansion: + // no Jacobian needed ==> no-op here + break; + case method__test_expansion_Jacobian: + case method__find_horizon: + if (AH_info.Jac_ptr == NULL) + then AH_info.Jac_ptr + = new_Jacobian(state.Jac_info + .Jacobian_store_solve_method, + ps, + verbose_info.print_algorithm_details); + break; + default: + CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING, + "\n" + " find_horizons(): unknown method=(int)%d!\n" + " in create-Jacobian switch\n" + " (this should never happen!)" + , + int(state.method)); /*NOTREACHED*/ + } AH_info.AH_found = false; switch (state.method) @@ -203,7 +221,7 @@ if (state.gi.geometry_method == geometry__local_interp_from_Cactus_grid) case method__test_expansion_Jacobian: do_test_expansion_Jacobian(verbose_info, state.timer_handle, IO_info, - test_all_Jacobian_methods, + (test_all_Jacobian_compute_methods != 0), state.Jac_info, state.cgi, state.gi, ps, *AH_info.Jac_ptr, active_flag, hn, state.N_horizons); @@ -276,6 +294,7 @@ if (active_flag && IO_info.output_BH_diagnostics) CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING, "\n" " find_horizons(): unknown method=(int)%d!\n" + " in main switch\n" " (this should never happen!)" , int(state.method)); /*NOTREACHED*/ @@ -441,14 +460,13 @@ return true; // *** NORMAL RETURN *** // timer_handle = a valid Cactus timer handle if we want to time the // apparent horizon process, or -ve to skip this // (we only time the computation, not the file I/O) -// test_all_Jacobian_methods = true ==> Test all known methods of computing -// the Jacobian matrix, and print all -// the resulting Jacobian matrices -// and their differences. -// false ==> Just test/print the numerical -// perturbation calculation. (This -// may be useful if one or more of -// the other methods is broken.) +// test_all_Jacobian_compute_methods +// = true ==> Test all known methods of computing the Jacobian +// matrix, and print all the resulting Jacobian matrices +// and their differences. +// false ==> Just test/print the numerical perturbation calculation. +// (This may be useful if one or more of the other methods +// is broken.) // active_flag = Controls whether this processor is "active": // true ==> Normal operation. // false ==> We compute the Jacobian(s) as usual, but don't @@ -465,34 +483,32 @@ namespace { bool do_test_expansion_Jacobian (const struct verbose_info& verbose_info, int timer_handle, struct IO_info& IO_info, - bool test_all_Jacobian_methods, + bool test_all_Jacobian_compute_methods, struct Jacobian_info& Jac_info, struct cactus_grid_info& cgi, struct geometry_info& gi, - patch_system& ps, Jacobian_matrix& Jac, + patch_system& ps, Jacobian& Jac, bool active_flag, int hn, int N_horizons) { // numerical perturbation -Jacobian_matrix* Jac_NP_ptr = & Jac; +Jacobian* Jac_NP_ptr = & Jac; if (! expansion(ps, cgi, gi, true)) then return false; // *** ERROR RETURN *** -Jac_info.Jacobian_method = Jacobian_method__numerical_perturb; +Jac_info.Jacobian_compute_method = Jacobian__numerical_perturbation; if (! expansion_Jacobian(ps, *Jac_NP_ptr, cgi, gi, Jac_info, true)) // print msgs then return false; // *** ERROR RETURN *** -Jacobian_matrix* Jac_SD_FDdr_ptr = NULL; -if (test_all_Jacobian_methods) +Jacobian* Jac_SD_FDdr_ptr = NULL; +if (test_all_Jacobian_compute_methods) then { // symbolic differentiation with finite diff d/dr - Jac_SD_FDdr_ptr - = create_Jacobian_matrix(ps, - cgi, gi, - Jac_info, - verbose_info.print_algorithm_details); + Jac_SD_FDdr_ptr = new_Jacobian(Jac_info.Jacobian_store_solve_method, + ps, + verbose_info.print_algorithm_details); if (! expansion(ps, cgi, gi, true)) then return false; // *** ERROR RETURN *** - Jac_info.Jacobian_method = Jacobian_method__symbolic_diff_with_FD_dr; + Jac_info.Jacobian_compute_method = Jacobian__symbolic_diff_with_FD_dr; if (! expansion_Jacobian(ps, *Jac_SD_FDdr_ptr, cgi, gi, Jac_info, true)) // print msgs @@ -548,7 +564,7 @@ bool do_find_horizon const struct solver_info& solver_info, bool initial_find_flag, const struct Jacobian_info& Jac_info, struct cactus_grid_info& cgi, struct geometry_info& gi, - patch_system& ps, Jacobian_matrix& Jac, + patch_system& ps, Jacobian& Jac, bool active_flag, int hn, int N_horizons) { if (verbose_info.print_algorithm_highlights) diff --git a/src/driver/io.cc b/src/driver/io.cc index 23c6fa2..defbe0f 100644 --- a/src/driver/io.cc +++ b/src/driver/io.cc @@ -187,8 +187,8 @@ default: // A null Jacobian pointer means to skip that Jacobian. // void output_Jacobians(const patch_system& ps, - const Jacobian_matrix* Jac_NP_ptr, - const Jacobian_matrix* Jac_SD_FDdr_ptr, + const Jacobian* Jac_NP_ptr, + const Jacobian* Jac_SD_FDdr_ptr, const struct IO_info& IO_info, const char base_file_name[], int hn, bool print_msg_flag, int AHF_iteration /* = 0 */) { diff --git a/src/driver/setup.cc b/src/driver/setup.cc index ad4e971..cc9798a 100644 --- a/src/driver/setup.cc +++ b/src/driver/setup.cc @@ -135,9 +135,10 @@ IO_info.time_iteration = 0; IO_info.time = 0.0; struct Jacobian_info& Jac_info = state.Jac_info; -Jac_info.Jacobian_method = decode_Jacobian_method(Jacobian_method); -Jac_info.Jacobian_storage_method - = decode_Jacobian_type(Jacobian_storage_method); +Jac_info.Jacobian_compute_method + = decode_Jacobian_compute_method(Jacobian_compute_method); +Jac_info.Jacobian_store_solve_method + = decode_Jacobian_store_solve_method(Jacobian_store_solve_method); Jac_info.perturbation_amplitude = Jacobian_perturbation_amplitude; struct solver_info& solver_info = state.solver_info; @@ -302,7 +303,7 @@ state.AH_info_array = new AH_info[N_horizons+1]; patch_system(origin_x[hn], origin_y[hn], origin_z[hn], ps_type, ghost_zone_width, patch_overlap_width, - N_zones_per_right_angle, + N_zones_per_right_angle[hn], gfns::nominal_min_gfn, gfns::nominal_max_gfn, gfns::ghosted_min_gfn, gfns::ghosted_max_gfn, interp_handle, interp_param_table_handle, @@ -310,7 +311,7 @@ state.AH_info_array = new AH_info[N_horizons+1]; AH_info.ps_ptr = &ps; ps.set_gridfn_to_constant(1.0, gfns::gfn__one); - // create the Jacobian matrix + // Jacobian matrix will be created later AH_info.Jac_ptr = NULL; if (verbose_info.print_algorithm_details) diff --git a/src/elliptic/Jacobian.cc b/src/elliptic/Jacobian.cc index 048c6ac..08d8ecc 100644 --- a/src/elliptic/Jacobian.cc +++ b/src/elliptic/Jacobian.cc @@ -4,9 +4,8 @@ // // <<<literal contents of "lapack.hh">>> // -// decode_Jacobian_type -// new_Jacobian_sparsity -// new_Jacobian_matrix +// decode_Jacobian_store_solve_method -- decode string into internal enum +// new_Jacobian -- object factory for Jacobian objects // #include <stdio.h> @@ -44,111 +43,76 @@ using jtutil::error_exit; //****************************************************************************** // -// This function decodes a character string specifying a type (derived class) -// of Jacobian, into an internal enum. +// This function decodes a character string specifying a specific type +// (derived class) of Jacobian, into an internal enum. // -enum Jacobian_type - decode_Jacobian_type(const char Jacobian_type_string[]) +enum Jacobian_store_solve_method + decode_Jacobian_store_solve_method + (const char Jacobian_store_solve_method_string[]) { -if (STRING_EQUAL(Jacobian_type_string, "dense matrix")) +if (STRING_EQUAL(Jacobian_store_solve_method_string, + "dense matrix/LAPACK")) then { - #ifdef HAVE_DENSE_JACOBIAN - return Jacobian_type__dense_matrix; + #ifdef HAVE_DENSE_JACOBIAN__LAPACK + return Jacobian__dense_matrix__LAPACK; #endif } -else if (STRING_EQUAL(Jacobian_type_string, "row-oriented sparse matrix")) +else if (STRING_EQUAL(Jacobian_store_solve_method_string, + "row-oriented sparse matrix/ILUCG")) then { - #ifdef HAVE_ROW_SPARSE_JACOBIAN - return Jacobian_type__row_sparse_matrix; + #ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + return Jacobian__row_sparse_matrix__ILUCG; #endif } else error_exit(ERROR_EXIT, -"decode_Jacobian_type(): unknown Jacobian_type_string=\"%s\"!\n", - Jacobian_type_string); /*NOTREACHED*/ +"decode_Jacobian_store_solve_method():\n" +" unknown Jacobian_store_solve_method_string=\"%s\"!\n", + Jacobian_store_solve_method_string); /*NOTREACHED*/ -// fall through to here ==> we recognize the matrix type, +// fall through to here ==> we recognize the matrix store_solve_method, // but it's not configured error_exit(ERROR_EXIT, "\n" -" decode_Jacobian_type():\n" -" Jacobian type \"%s\" is not configured in this binary;\n" -" see \"src/include/config.hh\" for details on what Jacobian types\n" -" are configured and how to change this\n" +" decode_Jacobian_store_solve_method():\n" +" Jacobian store_solve_method=\"%s\"\n" +" is not configured in this binary see \"src/include/config.hh\"\n" +" for details on what methods are configured and how to change this\n" , - Jacobian_type_string); /*NOTREACHED*/ + Jacobian_store_solve_method_string); /*NOTREACHED*/ } //****************************************************************************** // -// This function is an "object factory" for Jacobian_sparsity objects: -// it constructs and returns a pointer to a new-allocated Jacobian_sparsity -// object of the specified derived type. +// This function is an "object factory" for Jacobian objects: it constructs +// and returns a pointer to a new-allocated Jacobian object of the +// specified derived type. // // FIXME: the patch system shouldn't really have to be non-const, but // the Jacobian constructors all require this to allow the // linear solvers to directly update gridfns // -Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type, - patch_system& ps, - bool print_msg_flag /* = false */) +Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method, + patch_system& ps, + bool print_msg_flag /* = false */) { -switch (Jac_type) +switch (Jac_method) { -#ifdef HAVE_DENSE_JACOBIAN - case Jacobian_type__dense_matrix: - return new dense_Jacobian_sparsity(ps, print_msg_flag); +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + case Jacobian__dense_matrix__LAPACK: + return new dense_Jacobian__LAPACK(ps, print_msg_flag); #endif -#ifdef HAVE_ROW_SPARSE_JACOBIAN - case Jacobian_type__row_sparse_matrix: - return new row_sparse_Jacobian_sparsity(ps, print_msg_flag); +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + case Jacobian__row_sparse_matrix__ILUCG: + return new row_sparse_Jacobian__ILUCG(ps, print_msg_flag); #endif default: error_exit(ERROR_EXIT, -"new_Jacobian_sparsity(): unknown Jacobian_type=(int)%d!\n", - Jac_type); /*NOTREACHED*/ - } -} - -//****************************************************************************** - -// -// This function is an "object factory" for Jacobian_matrix objects: -// it constructs and returns a pointer to a new-allocated Jacobian_matrix -// object of the specified derived type. -// -// FIXME: the patch system shouldn't really have to be non-const, but -// the Jacobian constructors all require this to allow the -// linear solvers to directly update gridfns -// -Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type, - patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) -{ -switch (Jac_type) - { -#ifdef HAVE_DENSE_JACOBIAN - case Jacobian_type__dense_matrix: - return new dense_Jacobian_matrix - (ps, static_cast<dense_Jacobian_sparsity&>(JS), - print_msg_flag); -#endif - -#ifdef HAVE_ROW_SPARSE_JACOBIAN - case Jacobian_type__row_sparse_matrix: - return new row_sparse_Jacobian_matrix - (ps, static_cast<row_sparse_Jacobian_sparsity&>(JS), - print_msg_flag); -#endif - - default: - error_exit(ERROR_EXIT, -"new_Jacobian_matrix(): unknown Jacobian_type=(int)%d!\n", - Jac_type); /*NOTREACHED*/ + "new_Jacobian(): unknown method=(int)%d!\n", + int(Jac_method)); /*NOTREACHED*/ } } diff --git a/src/elliptic/Jacobian.hh b/src/elliptic/Jacobian.hh index d4b90ab..64ca182 100644 --- a/src/elliptic/Jacobian.hh +++ b/src/elliptic/Jacobian.hh @@ -3,10 +3,7 @@ // // Jacobian -- ABC to describe Jacobian matrix -// Jacobian_sparsity -- ABC to accumulate Jacobian sparsity info -// Jacobian_matrix -- ABC to store/use Jacobian matrix -// -// decode_Jacobian_type - decode string into internal enum +// decode_Jacobian_store_solve_method - decode string into internal enum // new_Jacobian - factory method // @@ -16,53 +13,67 @@ // //****************************************************************************** -//****************************************************************************** -//****************************************************************************** // // These classes are used to store and manipulate Jacobian matrices // for a patch system. // -// The inheritance diagram for the Jacobian classes looks like this: +// Jacobian is an abstract base class (ABC) that defines the generic +// Jacobian API. We derive classes from it for each type of sparsity +// pattern, then we derive classes from them for each linear solver. +// +// At present the inheritance graph looks like this: // Jacobian -// Jacobian_sparsity -// dense_Jacobian_sparsity #ifdef HAVE_DENSE_JACOBIAN -// row_sparse_Jacobian_sparsity #ifdef HAVE_ROW_SPARSE_JACOBIAN -// Jacobian_matrix -// dense_Jacobian_matrix #ifdef HAVE_DENSE_JACOBIAN -// row_sparse_Jacobian_matrix #ifdef HAVE_ROW_SPARSE_JACOBIAN -// where the most-derived classes are all conditional on the noted #ifdef -// symbols. -// -// The Jacobian_sparsity objects are used to accumulate information -// on the Jacobian's sparsity pattern; this is then used in constructing -// the corresponding Jacobian_matrix object (which actually stores -// the Jacobian, and contains member functions to solve elliptic systems -// using the Jacobian as the LHS matrix). +// dense_Jacobian +// dense_Jacobian__LAPACK +// row_sparse_Jacobian +// row_sparse_Jacobian__ILUCG +// each derived class is inside a corresponding #ifdef. The #ifdef +// symbols for linear solvers are specified in "../include/config.h"; +// those for sparsity patterns are specified below based on the ones +// for linear solvers. +// + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + #define HAVE_DENSE_JACOBIAN +#endif + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + #define HAVE_ROW_SPARSE_JACOBIAN +#endif + +//****************************************************************************** + // // We assume a "traversal" interface for computing Jacobians, defined -// by the Jacobian API. Because this API is shared by (all) the -// Jacobian_sparsity and Jacobian_matrix classes, the same traversal -// routines (prototyped to update Jacobian objects) can be used both -// to record sparsity patterns (ignoring the actual floating-point values) -// and to store the Jacobian matrix. -// -// Thus the typical code to construct and use a Jacobian matrix looks -// like this: +// by the Jacobian API. The typical code to construct and use a +// Jacobian matrix looks like this: // // prototype -// void trverse_Jacobian(const patch_system& ps, Jacobian& Jac); +// // ... calls Jac.zero_matrix() +// // Jac.set_element() +// // Jac.sum_into_element() +// void traverse_Jacobian(const patch_system& ps, Jacobian& Jac); // -// Jacobian_sparsity& JS = new_Jacobian_sparsity(Jac_type, ps); -// traverse_Jacobian(ps, JS); -// Jacobian_matrix& Jac = new_Jacobian_matrix(Jac_type, ps, JS); +// Jacobian& Jac = new_Jacobian(Jac_type, ps); // traverse_Jacobian(ps, Jac); // Jac.solve_linear_system(...); // +// Jac.zero_matrix() +// traverse_Jacobian(ps, Jac); // must specify same sparsity pattern +// // as before +// Jac.solve_linear_system(...); +// +// // // A row/column index of the Jacobian (denoted II/JJ) is a 0-origin grid // point number within the patch system. // +// Since many of the derived classes use Fortran routines, we also use +// 1-origin indices; these have a leading "f", eg fII/fJJ. +// + +// // Note that the APIs here implicitly assume there is only a *single* gridfn // in the Jacobian computation. (If we had N gridfns for this, then the // Jacobian would really be a block matrix with N*N blocks.) This isn't @@ -71,13 +82,13 @@ //****************************************************************************** -// ABC to define Jacobian-traversal interface +// ABC to define Jacobian matrix class Jacobian { public: // basic meta-info patch_system& my_patch_system() const { return ps_; } - int NN() const { return NN_; } + int N_rows() const { return N_rows_; } // convert (patch,irho,isigma) <--> row/column index int II_of_patch_irho_isigma(const patch& p, int irho, int isigma) @@ -87,6 +98,29 @@ public: const { return ps_.patch_irho_isigma_of_gpn(II, irho,isigma); } + + // + // convert C <--> Fortran indices + // + int csub(int f) const { return f-1; } + int fsub(int c) const { return c+1; } + + + // + // routines to access the matrix + // + + // get a matrix element + virtual fp element(int II, int JJ) const = 0; + + // is a given element explicitly stored, or implicitly 0 via sparsity + virtual bool is_explicitly_stored(int II, int JJ) const = 0; + + + // + // routines for setting values in the matrix + // + // zero the entire matrix virtual void zero_matrix() = 0; @@ -96,11 +130,28 @@ public: // sum a value into a matrix element virtual void sum_into_element(int II, int JJ, fp value) = 0; -protected: + + // + // solve linear system J.x = rhs + // ... rhs and x are nominal-grid gridfns + // ... may modify Jacobian matrix (eg for LU decomposition) + // ... returns 0.0 if matrix is numerically singular + // condition number (> 0.0) if known + // -1.0 if condition number is unknown + // ... once this has been called, the sparsity pattern should + // not be changed, i.e. no new nonzeros should be introduced + // into the matrix + // + virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0; + + + // // constructor, destructor + // +protected: Jacobian(patch_system& ps) : ps_(ps), - NN_(ps.N_grid_points()) + N_rows_(ps.N_grid_points()) { } public: virtual ~Jacobian() { } @@ -114,66 +165,18 @@ private: protected: patch_system& ps_; - int NN_; // ps_.N_grid_points() - }; - -//****************************************************************************** - -// ABC to accumulate Jacobian sparsity info -class Jacobian_sparsity - : public Jacobian - { -public: - Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false) - : Jacobian(ps) - { } - virtual ~Jacobian_sparsity() { } - }; - -//****************************************************************************** - -// Jacobian_matrix -- ABC to store/use Jacobian matrix -class Jacobian_matrix - : public Jacobian - { -public: - // access (get) a matrix element - virtual fp element(int II, int JJ) const = 0; - - // is a given element explicitly stored, or implicitly 0 via sparsity - virtual bool is_explicitly_stored(int II, int JJ) const = 0; - - // solve linear system J.x = rhs - // ... rhs and x are nominal-grid gridfns - // ... may modify Jacobian matrix (eg for LU decomposition) - // ... returns 0.0 if matrix is numerically singular - // condition number (> 0.0) if known - // -1.0 if condition number is unknown - virtual fp solve_linear_system(int rhs_gfn, int x_gfn) = 0; - - // constructor, destructor - // ... note Jacobian_sparsity argument of constructor is non-const - // to allow us to take over ownership of data structures - Jacobian_matrix(patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag = false) - : Jacobian(ps) - { } - virtual ~Jacobian_matrix() { } + int N_rows_; }; //****************************************************************************** -//****************************************************************************** -//****************************************************************************** -enum Jacobian_type +enum Jacobian_store_solve_method { -#ifdef HAVE_DENSE_JACOBIAN - Jacobian_type__dense_matrix, +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + Jacobian__dense_matrix__LAPACK, #endif -#ifdef HAVE_ROW_SPARSE_JACOBIAN - Jacobian_type__row_sparse_matrix // no comma on last entry in enum +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + Jacobian__row_sparse_matrix__ILUCG // no comma on last entry in enum #endif }; @@ -182,17 +185,12 @@ enum Jacobian_type // // decode string into internal enum -enum Jacobian_type - decode_Jacobian_type(const char Jacobian_type_string[]); +enum Jacobian_store_solve_method + decode_Jacobian_store_solve_method + (const char Jacobian_store_solve_method_string[]); // "object factory" routines to construct and return // pointers to new-allocated objects of specified derived types -class Jacobian_sparsity; -class Jacobian_matrix; -Jacobian_sparsity* new_Jacobian_sparsity(enum Jacobian_type Jac_type, - patch_system& ps, - bool print_msg_flag = false); -Jacobian_matrix* new_Jacobian_matrix(enum Jacobian_type Jac_type, - patch_system& ps, - Jacobian_sparsity& JS, - bool print_msg_flag = false); +Jacobian* new_Jacobian(enum Jacobian_store_solve_method Jac_method, + patch_system& ps, + bool print_msg_flag = false); diff --git a/src/elliptic/README b/src/elliptic/README index e20a5ba..09fa662 100644 --- a/src/elliptic/README +++ b/src/elliptic/README @@ -3,6 +3,40 @@ This directory contains code for solving elliptic systems on our The main files are as follows: -Jacobian.{cc,hh} - These classes stores a Jacobian matrix and interface to - linear-solver routines. +Jacobian.{hh,cc} + These define a Jacobian class. This is a generic interface + (a C++ abstract base class) to set up a Jacobian matrix and + solve linear systems with this matrix as the right hand side + matrix. + +dense_Jacobian.{hh,cc} + These define a dense_Jacobian class (to store a Jacobian matrix + in a Fortran dense-matrix format) and a dense_Jacobian__LAPACK + class (to solve linear systems using LAPACK routines). These + classes are derived from (and hence share the generic interface + of) the Jacobian class. + +row_sparse_Jacobian.{hh,cc} + These define a row_sparse_Jacobian class (to store a Jacobian + matrix in a row-oriented sparse-matrix format) and a + row_sparse_Jacobian__ILUCG class (to solve linear systems using + an ILUCG). These classes are derived from (and hence share the + generic interface of) the Jacobian class. + +lapack.h + Header file defining C/C++ prototypes for a few LAPACK routines. +lapack_wrapper.F77 + Wrapper routines around a few LAPACK routines, to avoid problems + with C <--> Fortran passing of character-string arguments. + +ilucg.h + Header file defining C/C++ prototypes for ILUCG routines +ilucg.f77 + ILUCG (Incomplete LU-decomposition / conjugate gradiant) + subroutine. I don't know the original author of this routine; + it was provided to me by Tom Nicol, UBC Computing Center, + in c.1985. +ilucg_wrapper.F77 + Wrapper routines around the ILUCG routines, to avoid problems + with C <--> Fortran passing of Fortran logical return results. + diff --git a/src/elliptic/dense_Jacobian.cc b/src/elliptic/dense_Jacobian.cc index 72dc797..7999513 100644 --- a/src/elliptic/dense_Jacobian.cc +++ b/src/elliptic/dense_Jacobian.cc @@ -1,15 +1,20 @@ -// Jacobian.cc -- data structures for the Jacobian matrix +// dense_Jacobian.cc -- dense-matrix Jacobian // $Header$ // // <<<literal contents of "lapack.hh">>> // #ifdef HAVE_DENSE_JACOBIAN -// dense_Jacobian_matrix::dense_Jacobian_matrix -// dense_Jacobian_matrix::~dense_Jacobian_matrix -// dense_Jacobian_matrix::zero_matrix -// dense_Jacobian_matrix::solve_linear_system +// dense_Jacobian::dense_Jacobian +// dense_Jacobian::zero_matrix #endif +// +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// dense_Jacobian__LAPACK::dense_Jacobian__LAPACK +// dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK +// dense_Jacobian__LAPACK::solve_linear_system +#endif +// #include <stdio.h> #include <assert.h> @@ -45,8 +50,10 @@ using jtutil::error_exit; //****************************************************************************** // -// FIXME: Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), -// so we include the contents of "lapack.h" inline here. +// FIXME: +// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), +// so we include the contents of "lapack.h" inline here. This is a +// kludge! :( :( //#include "lapack.h" // @@ -57,7 +64,7 @@ using jtutil::error_exit; /* * prerequisites: * "cctk.h" - * "config.hh" + * "config.h" */ #ifdef __cplusplus @@ -116,21 +123,17 @@ void CCTK_FCALL #ifdef HAVE_DENSE_JACOBIAN // -// This function constructs a dense_Jacobian_matrix object. +// This function constructs a dense_Jacobian object. // -dense_Jacobian_matrix::dense_Jacobian_matrix(patch_system& ps, - dense_Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) - : Jacobian_matrix(ps, JS), - matrix_(0,NN()-1, 0,NN()-1), - pivot_(new integer[NN()]), - iwork_(new integer[NN()]), - rwork_(new fp[4*NN()]) // no comma +dense_Jacobian::dense_Jacobian(patch_system& ps, + bool print_msg_flag /* = false */) + : Jacobian(ps), + matrix_(0,N_rows_-1, 0,N_rows_-1) { if (print_msg_flag) then CCTK_VInfo(CCTK_THORNSTRING, - "dense_Jacobian_matrix ctor: NN()=%d", - NN()); + "constructing dense_Jacobian: N_rows_=%d", + N_rows_); } #endif // HAVE_DENSE_JACOBIAN @@ -138,55 +141,73 @@ if (print_msg_flag) #ifdef HAVE_DENSE_JACOBIAN // -// THis function destroys a dense_Jacobian_matrix object. +// This function zeros a dense_Jacobian object. +// Bugs: it could be made more efficient... // -dense_Jacobian_matrix::~dense_Jacobian_matrix() +void dense_Jacobian::zero_matrix() { -delete[] iwork_; -delete[] rwork_; -delete[] pivot_; + for (int JJ = 0 ; JJ < N_rows_ ; ++JJ) + { + for (int II = 0 ; II < N_rows_ ; ++II) + { + set_element(II,JJ, 0.0); + } + } } #endif // HAVE_DENSE_JACOBIAN //****************************************************************************** +//****************************************************************************** +//****************************************************************************** -#ifdef HAVE_DENSE_JACOBIAN +#ifdef HAVE_DENSE_JACOBIAN__LAPACK // -// This function zeros a dense_Jacobian_matrix object. -// Bugs: it could be made more efficient... +// This function constructs a dense_Jacobian__LAPACK object. // -void dense_Jacobian_matrix::zero_matrix() +dense_Jacobian__LAPACK::dense_Jacobian__LAPACK(patch_system& ps, + bool print_msg_flag /* = false */) + : dense_Jacobian(ps, print_msg_flag), + pivot_(new integer[ N_rows_]), + iwork_(new integer[ N_rows_]), + rwork_(new fp [4*N_rows_]) { - for (int JJ = 0 ; JJ < NN() ; ++JJ) - { - for (int II = 0 ; II < NN() ; ++II) - { - set_element(II,JJ, 0.0); - } - } } -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK //****************************************************************************** -#ifdef HAVE_DENSE_JACOBIAN +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// +// This function destroys a dense_Jacobian__LAPACK object. +// +dense_Jacobian__LAPACK::~dense_Jacobian__LAPACK() +{ +delete[] rwork_; +delete[] iwork_; +delete[] pivot_; +} +#endif // HAVE_DENSE_JACOBIAN__LAPACK + +//****************************************************************************** + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK // // This function solves the linear system J.x = rhs, with rhs and x // being nominal-grid gridfns, using LAPACK LU-decomposition routines. // It returns the estimated infinity-norm condition number of the linear // system, or 0.0 if the matrix is numerically singular // -fp dense_Jacobian_matrix::solve_linear_system(int rhs_gfn, int x_gfn) +fp dense_Jacobian__LAPACK::solve_linear_system(int rhs_gfn, int x_gfn) { -const fp *rhs = my_patch_system().gridfn_data(rhs_gfn); -fp *x = my_patch_system().gridfn_data(x_gfn); +const fp *rhs = ps_.gridfn_data(rhs_gfn); +fp *x = ps_.gridfn_data(x_gfn); fp *A = matrix_.data_array(); const integer infinity_norm_flag = 0; const integer stride = 1; const integer NRHS = 1; -const integer N = NN(); -const integer N2 = NN() * NN(); +const integer N = N_rows_; +const integer N2 = N_rows_ * N_rows_; // compute the infinity-norm of the matrix A // ... max_posn = 1-origin index of A[] element with largest absolute value @@ -204,7 +225,7 @@ const fp A_infnorm = jtutil::abs(A[max_posn-1]); // ... [sd]gesv() use an "in out" design, where the same argument // is used for both rhs and x ==> we must first copy rhs to x // - for (int II = 0 ; II < NN() ; ++II) + for (int II = 0 ; II < N_rows_ ; ++II) { x[II] = rhs[II]; } @@ -220,8 +241,8 @@ integer info; if (info < 0) then error_exit(ERROR_EXIT, "\n" -"***** dense_Jacobian_matrix::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" -" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!\n" +"***** dense_Jacobian__LAPACK::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" +" error return (bad argument) info=%d from [sd]gesv() LAPACK routine!" , rhs_gfn, x_gfn, int(info)); /*NOTREACHED*/ @@ -248,4 +269,4 @@ if (rcond == 0.0) // *** (singular matrix) return 1.0/rcond; } -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK diff --git a/src/elliptic/dense_Jacobian.hh b/src/elliptic/dense_Jacobian.hh index 1e269d5..0c85f47 100644 --- a/src/elliptic/dense_Jacobian.hh +++ b/src/elliptic/dense_Jacobian.hh @@ -1,10 +1,12 @@ -// Jacobian.hh -- data structures for the Jacobian matrix +// dense_Jacobian.hh -- dense-matrix Jacobian // $Header$ // #ifdef HAVE_DENSE_JACOBIAN -// dense_Jacobian_sparsity -- (no-op) accumulate dense Jacobian sparsity info -// dense_Jacobian_matrix -- Jacobian stored as a dense matrix +// dense_Jacobian -- Jacobian stored as a dense matrix +#endif +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// dense_Jacobian__LAPACK -- dense_Jacobian with LAPACK linear solver #endif // @@ -18,44 +20,30 @@ #ifdef HAVE_DENSE_JACOBIAN // -// This class accumulates the sparsity pattern of a dense-matrix Jacobian. -// -// Since a dense-matrix Jacobian doesn't care about the sparsity pattern, -// the member functions of this class are just no-ops. +// This class stores the Jacobian as a dense matrix in Fortran (column) +// order. // -class dense_Jacobian_sparsity - : public Jacobian_sparsity +class dense_Jacobian + : public Jacobian { public: - // record the zeroing of the entire matrix - void zero_matrix() { } + // + // routines to access the matrix + // - // record the setting of a matrix element to a specified value - void set_element(int II, int JJ, fp value) { } + // get a matrix element + fp element(int II, int JJ) const + { return matrix_(JJ,II); } - // record the summing of a value into a matrix element - void sum_into_element(int II, int JJ, fp value) { } + // dense matrix ==> all elements are explicitly stored + bool is_explicitly_stored(int II, int JJ) const + { return true; } - // constructor, destructor - dense_Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false) - : Jacobian_sparsity(ps, print_msg_flag) - { } - ~dense_Jacobian_sparsity() { } - }; -#endif // HAVE_DENSE_JACOBIAN -//****************************************************************************** + // + // routines for setting values in the matrix + // -#ifdef HAVE_DENSE_JACOBIAN -// -// This class stores the Jacobian as a dense matrix in Fortran (column) -// order. -// -class dense_Jacobian_matrix - : public Jacobian_matrix - { -public: // zero the entire matrix void zero_matrix(); @@ -67,16 +55,30 @@ public: void sum_into_element(int II, int JJ, fp value) { matrix_(JJ,II) += value; } - // access (get) a matrix element - fp element(int II, int JJ) - const - { return matrix_(JJ,II); } + // + // constructor, destructor + // +protected: + dense_Jacobian(patch_system& ps, + bool print_msg_flag = false); + ~dense_Jacobian() { } + +protected: + // Fortran storage order ==> subscripts are (JJ,II) + jtutil::array2d<fp> matrix_; + }; +#endif // HAVE_DENSE_JACOBIAN - // this is a dense matrix ==> all values are stored explicitly - bool is_explicitly_stored(int II, int JJ) - const - { return true; } +//****************************************************************************** +#ifdef HAVE_DENSE_JACOBIAN__LAPACK +// +// This class defines the linear solver routine using LAPACK. +// +class dense_Jacobian__LAPACK + : public dense_Jacobian + { +public: // solve linear system J.x = rhs via LAPACK // ... rhs and x are nominal-grid gridfns // ... overwrites Jacobian matrix with its LU decomposition @@ -87,20 +89,16 @@ public: // constructor, destructor public: - dense_Jacobian_matrix(patch_system& ps, - dense_Jacobian_sparsity& JS, - bool print_msg_flag = false); - ~dense_Jacobian_matrix(); + dense_Jacobian__LAPACK(patch_system& ps, + bool print_msg_flag = false); + ~dense_Jacobian__LAPACK(); private: - // subscripts are (JJ,II) (both 0-origin) - jtutil::array2d<fp> matrix_; - // pivot vector for LAPACK routines - integer *pivot_; + integer *pivot_; // size N_rows_ // work vectors for LAPACK condition number computation - integer *iwork_; - fp *rwork_; + integer *iwork_; // size N_rows_ + fp *rwork_; // size 4*N_rows_ }; -#endif // HAVE_DENSE_JACOBIAN +#endif // HAVE_DENSE_JACOBIAN__LAPACK diff --git a/src/elliptic/ilucg.f77 b/src/elliptic/ilucg.f77 new file mode 100644 index 0000000..68f99f5 --- /dev/null +++ b/src/elliptic/ilucg.f77 @@ -0,0 +1,1055 @@ + LOGICAL FUNCTION SILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE) + IMPLICIT REAL (A-H,O-Z) +C +C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT +C - -- - - +C WHERE: +C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND +C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND +C B AND X ARE THE NEW RHS AND INITIAL GUESS. +C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE +C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO +C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1). +C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES +C THE COLUMN NUMBER OF A(K). +C A IS A REAL ARRAY DIMENSIONED MAX. IT CONTAINS THE +C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW. +C B CONTAINS THE RHS VECTOR. +C X IS A REAL ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS +C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION. +C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2. +C RTEMP IS AN REAL SCRATCH ARRAY DIMENSIONED 4*|N|+MAX. +C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE +C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE +C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE +C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE +C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS +C .LT. 0.0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION, +C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO +C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR. +C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT". +C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES +C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE). +C ON ERROR RETURN, THE ROW IN ERROR. +C +C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN. +C +C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.) +C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.) +C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.) +C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.) +C +C REFERENCE: +C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT +C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS", +C J.COMPUT.PHYS. JAN 1978 PP 43-65 +C + DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*) + LOGICAL SLU0 + NP=IABS(N) + IE=0 + IF (NP.EQ.0) GO TO 20 +C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS. + N1=NP+1 + MAX=IA(N1)-IA(1) + ILU=1 + JLU=ILU+N1 + ID=JLU+MAX + IC=ID+NP + JC=IC+N1 + JCI=JC+MAX + IR=1 + IP=IR+NP + IS1=IP+NP + IS2=IS1+NP + IALU=IS2+NP + IF (N.LT.0) GO TO 10 +C DO INCOMPLETE LU DECOMPOSITION + IF (SLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), + * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20 +C AND DO CONJUGATE GRADIENT ITERATIONS +C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS +C - J. THORNBURG, 17/MAY/85. +10 CALL SNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), + * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2), + * EPS,ITER,IE) + SILUCG = .FALSE. + RETURN +20 SILUCG = .TRUE. + RETURN + END + LOGICAL FUNCTION SLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*), + * ALU(*),ILU(*),JLU(*),ID(N),V(N) + LOGICAL NODIAG + COMMON /ICBD00/ ICBAD +C INCOMPLETE LU DECOMPOSITION +C WHERE: +C N,IA,JA, AND A ARE DESCRIBED IN FUNCTION SILUCG +C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE +C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN +C ARRAY JC. +C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN +C THE COLUMN STRUCTURE. +C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT +C OF THE COLUMN STRUCTURE. +C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL +C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE +C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U. +C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU. +C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS +C INDICES TO THE DIAGONAL ELEMENTS OF U. +C V IS A REAL SCRATCH VECTOR OF LENGTH N. +C IE GIVES THE ROW NUMBER IN ERROR. +C +C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR. +C +C NOTE: SLU0 SETS ARGUMENTS IC THROUGH V. +C + ICBAD=0 +C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U. +C +C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE + DO 10 I=1,N + IC(I)=0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + NODIAG=.TRUE. + DO 20 K=KS,KE + J=JA(K) + IF (J.LT.1.OR.J.GT.N) GO TO 210 + IC(J)=IC(J)+1 + IF (J.EQ.I) NODIAG=.FALSE. +20 CONTINUE + IF (NODIAG) GO TO 210 +30 CONTINUE +C MAKE IC INTO INDICES + KOLD=IC(1) + IC(1)=1 + DO 40 I=1,N + KNEW=IC(I+1) + IF (KOLD.EQ.0) GO TO 210 + IC(I+1)=IC(I)+KOLD + KOLD=KNEW +40 CONTINUE +C SET JC AND JCI FOR COLUMN STRUCTURE + DO 60 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + DO 50 K=KS,KE + J=JA(K) + L=IC(J) + IC(J)=L+1 + JC(L)=I + JCI(L)=K +50 CONTINUE +60 CONTINUE +C FIX UP IC + KOLD=IC(1) + IC(1)=1 + DO 70 I=1,N + KNEW=IC(I+1) + IC(I+1)=KOLD + KOLD=KNEW +70 CONTINUE +C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE + NP=N+1 + DO 80 I=1,NP + ILU(I)=IA(I) +80 CONTINUE +C MOVE ELEMENTS, SET JLU AND ID + DO 100 J=1,N + KS=IC(J) + KE=IC(J+1)-1 + DO 90 K=KS,KE + I=JC(K) + L=ILU(I) + ILU(I)=L+1 + JLU(L)=J + KK=JCI(K) + ALU(L)=A(KK) + IF (I.EQ.J) ID(J)=L +90 CONTINUE +100 CONTINUE +C RESET ILU (COULD JUST USE IA) + DO 110 I=1,NP + ILU(I)=IA(I) +110 CONTINUE +C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE +C +C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION + DO 120 I=1,N + V(I)=0.0 +120 CONTINUE + DO 200 IROW=1,N + I=ID(IROW) + PIVOT=ALU(I) + IF (PIVOT.NE.0.0) GO TO 140 +C THIS CASE MAKES THE ILU LESS ACCURATE + ICBAD=ICBAD+1 + KS=ILU(IROW) + KE=ILU(IROW+1)-1 + DO 130 K=KS,KE + PIVOT=PIVOT+ABS(ALU(K)) +130 CONTINUE + IF (PIVOT.EQ.0.0) GO TO 220 +140 PIVOT=1.0/PIVOT + ALU(I)=PIVOT + KKS=I+1 + KKE=ILU(IROW+1)-1 + IF (KKS.GT.KKE) GO TO 160 + DO 150 K=KKS,KKE + J=JLU(K) + V(J)=ALU(K) +150 CONTINUE +C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX +160 KS=IC(IROW) + KE=IC(IROW+1)-1 + DO 190 K=KS,KE + I=JC(K) + IF (I.LE.IROW) GO TO 190 + LS=ILU(I) + LE=ILU(I+1)-1 + DO 180 L=LS,LE + J=JLU(L) + IF (J.LT.IROW) GO TO 180 + IF (J.GT.IROW) GO TO 170 + AMULT=ALU(L)*PIVOT + ALU(L)=AMULT + IF (AMULT.EQ.0.0) GO TO 190 + GO TO 180 +170 IF (V(J).EQ.0.0) GO TO 180 + ALU(L)=ALU(L)-AMULT*V(J) +180 CONTINUE +190 CONTINUE +C RESET V + IF (KKS.GT.KKE) GO TO 200 + DO 195 K=KKS,KKE + J=JLU(K) + V(J)=0.0 +195 CONTINUE +200 CONTINUE +C NORMAL RETURN + SLU0 = .FALSE. + RETURN +C ERROR RETURNS +210 IE=I + SLU0 = .TRUE. + RETURN +C NORMAL RETURN +220 IE=IROW + SLU0 = .TRUE. + RETURN + END + SUBROUTINE SNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2, + * EPS,ITER,IE) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N), + * R(N),P(N),S1(N),S2(N) +C NONSYMMETRIC CONJUGATE GRADIENT +C WHERE: +C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE SILUCG. +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU. +C JLU GIVES COLUMN NUMBER. +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U. +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U. +C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE +C ITERATIONS. +C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85. +C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE +C SILUCG). +C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT". +C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF +C NO CONVERGENCE. +C +C R0=B-A*X0 + CALL SMUL10(N,IA,JA,A,X,R) + DO 10 I=1,N + R(I)=B(I)-R(I) +10 CONTINUE +C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0 +C FIRST SOLVE L*LT*S1=R0 + CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C TIMES TRANSPOSE OF A + CALL SMUL20(N,IA,JA,A,S1,S2) +C THEN SOLVE UT*U*P=S2 + CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,P) + IE=0 + RDOT = SGVV(R,S1,N) +C LOOP BEGINS HERE +20 CALL SMUL30(N,ILU,JLU,ID,ALU,P,S2) + PDOT = SGVV(P,S2,N) +C + IF (PDOT.EQ.0.0) RETURN +C + ALPHA=RDOT/PDOT +C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P) + XMAX=1.0 + XDIF=0.0 + DO 30 I=1,N + AP=ALPHA*P(I) + X(I)=X(I)+AP +C EQUATION 9PB X=X+ALPHA*P + AP=ABS(AP) + XX=ABS(X(I)) + IF (AP.GT.XDIF) XDIF=AP + IF (XX.GT.XMAX) XMAX=XX +30 CONTINUE + IE=IE+1 +C +C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89) +C + IF ((EPS .GT. 0.0) .AND. (XDIF .LE. EPS * XMAX)) RETURN + IF ((EPS .LT. 0.0) .AND. (XMAX + XDIF/ABS(EPS) .EQ. XMAX)) + * RETURN +C +C EXCEEDED ITERATION LIMIT? +C + IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60 + CALL SMUL10(N,IA,JA,A,P,S2) + DO 40 I=1,N + R(I)=R(I)-ALPHA*S2(I) +C EQUATION 9PC R=R-ALPHA*A*P +40 CONTINUE + CALL SSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C CALL INPROD(R,S1,EDOT,RRDOT,N) + RRDOT = SGVV(R,S1,N) + BETA=RRDOT/RDOT +C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R) + RDOT=RRDOT + CALL SMUL20(N,IA,JA,A,S1,S2) + CALL SSUBU0(N,ILU,JLU,ID,ALU,S2,S1) + DO 50 I=1,N + P(I)=S1(I)+BETA*P(I) +C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P +50 CONTINUE + GO TO 20 +60 IE=-IE + RETURN + END + SUBROUTINE SMUL10(N,IA,JA,A,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 20 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + SUM=0.0 + DO 10 K=KS,KE + J=JA(K) + SUM=SUM+A(K)*B(J) +10 CONTINUE + X(I)=SUM +20 CONTINUE + RETURN + END + SUBROUTINE SMUL20(N,IA,JA,A,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + BB=B(I) + DO 20 K=KS,KE + J=JA(K) + X(J)=X(J)+A(K)*BB +20 CONTINUE +30 CONTINUE + RETURN + END + SUBROUTINE SMUL30(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS +C B IS THE VECTOR +C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0 +10 CONTINUE + DO 50 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + DIAG=1.0/ALU(KS-1) + XX=DIAG*B(I) + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + XX=XX+ALU(K)*B(J) +20 CONTINUE +30 X(I)=X(I)+DIAG*XX + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)+ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + SUBROUTINE SSUBU0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + XX=X(I)*ALU(KS-1) + X(I)=XX + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +20 CONTINUE +30 CONTINUE +C BACK SUBSTITUTION + DO 60 II=1,N + I=NP-II + KS=ID(I)+1 + KE=ILU(I+1)-1 + SUM=0.0 + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +40 CONTINUE +50 X(I)=(X(I)-SUM)*ALU(KS-1) +60 CONTINUE + RETURN + END + SUBROUTINE SSUBL0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT REAL (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU +C JLU GIVES THE COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 30 + SUM=0.0 + DO 20 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +20 CONTINUE + X(I)=X(I)-SUM +30 CONTINUE +C BACK SUBSTITUTION + DO 50 II=1,N + I=NP-II + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 50 + XX=X(I) + IF (XX.EQ.0.0) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + REAL FUNCTION SGVV(V,W,N) + IMPLICIT REAL (A-H,O-Z) + DIMENSION V(N),W(N) +C SUBROUTINE TO COMPUTE REAL VECTOR DOT PRODUCT. +C + SUM = 0.0 + DO 10 I = 1,N + SUM = SUM + V(I)*W(I) +10 CONTINUE + SGVV = SUM + RETURN + END + LOGICAL FUNCTION DILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,ITER,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) +C +C INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT +C - -- - - +C WHERE: +C |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND +C RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND +C B AND X ARE THE NEW RHS AND INITIAL GUESS. +C IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE +C INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO +C ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1). +C JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES +C THE COLUMN NUMBER OF A(K). +C A IS A DOUBLE PRECISION ARRAY DIMENSIONED MAX. IT CONTAINS THE +C NONZERO ELEMENTS OF THE MATRIX STORED BY ROW. +C B CONTAINS THE RHS VECTOR. +C X IS A DOUBLE PRECISION ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS +C AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION. +C ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2. +C RTEMP IS A DOUBLE PRECISION SCRATCH ARRAY DIMENSIONED 4*|N|+MAX. +C EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE +C ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE +C IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE +C CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE +C INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS +C .LT. 0.0D0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION, +C SO THAT, FOR EXAMPLE, EPS = -256.0 WILL ALLOW THE LAST TWO +C HEXADECIMAL DIGITS OF THE SOLUTION TO BE IN ERROR. +C ITER GIVES THE REQUESTED NUMBER OF ITERATIONS, OR IS 0 FOR "NO LIMIT". +C IE IS AN INTEGER VARIABLE. FOR NORMAL RETURN, IT GIVES +C THE NUMBER OF ITERATIONS DONE (NEGATIVE IS NO CONVERGENCE). +C ON ERROR RETURN, THE ROW IN ERROR. +C +C THE FUNCTION RETURNS .FALSE. IF ALL'S WELL, .TRUE. ON AN ERROR RETURN. +C +C (MODIFIED TO RETURN LOGICAL VALUE, J. THORNBURG, 13/MAY/85.) +C (MODIFIED TO ADD CONVERGENCE CRITERIA, J. THORNBURG, 17/MAY/85.) +C (MODIFIED CONVERGENCE CRITERIA, J. THORNBURG, 4/AUG/89.) +C (MODIFIED TO AVOID SKIP RETURNS, J. THORNBURG, 10/SEP/89.) +C +C REFERENCE: +C D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT +C METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS", +C J.COMPUT.PHYS. JAN 1978 PP 43-65 +C + DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*) + LOGICAL DLU0 + NP=IABS(N) + IE=0 + IF (NP.EQ.0) GO TO 20 +C CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS. + N1=NP+1 + MAX=IA(N1)-IA(1) + ILU=1 + JLU=ILU+N1 + ID=JLU+MAX + IC=ID+NP + JC=IC+N1 + JCI=JC+MAX + IR=1 + IP=IR+NP + IS1=IP+NP + IS2=IS1+NP + IALU=IS2+NP + IF (N.LT.0) GO TO 10 +C DO INCOMPLETE LU DECOMPOSITION + IF (DLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), + * ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IE)) GOTO 20 +C AND DO CONJUGATE GRADIENT ITERATIONS +C CALL MODIFIED TO ADD ADJUSTABLE CONVERGENCE CRITERIA EPS +C - J. THORNBURG, 17/MAY/85. +10 CALL DNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), + * RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2), + * EPS,ITER,IE) + DILUCG = .FALSE. + RETURN +20 DILUCG = .TRUE. + RETURN + END + LOGICAL FUNCTION DLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*), + * ALU(*),ILU(*),JLU(*),ID(N),V(N) + LOGICAL NODIAG + COMMON /ICBD00/ ICBAD +C INCOMPLETE LU DECOMPOSITION +C WHERE: +C N,IA,JA, AND A ARE DESCRIBED IN SUBROUTINE ILUCG +C IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE +C INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN +C ARRAY JC. +C JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN +C THE COLUMN STRUCTURE. +C JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A. +C JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT +C OF THE COLUMN STRUCTURE. +C ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL +C CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE +C RECIPROCALS OF THE DIAGONAL ELEMENTS OF U. +C ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU. +C ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS +C INDICES TO THE DIAGONAL ELEMENTS OF U. +C V IS A REAL SCRATCH VECTOR OF LENGTH N. +C IE GIVES THE ROW NUMBER IN ERROR. +C +C RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR. +C +C NOTE: DLU0 SETS ARGUMENTS IC THROUGH V. +C + ICBAD=0 +C ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U. +C +C FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE + DO 10 I=1,N + IC(I)=0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + NODIAG=.TRUE. + DO 20 K=KS,KE + J=JA(K) + IF (J.LT.1.OR.J.GT.N) GO TO 210 + IC(J)=IC(J)+1 + IF (J.EQ.I) NODIAG=.FALSE. +20 CONTINUE + IF (NODIAG) GO TO 210 +30 CONTINUE +C MAKE IC INTO INDICES + KOLD=IC(1) + IC(1)=1 + DO 40 I=1,N + KNEW=IC(I+1) + IF (KOLD.EQ.0) GO TO 210 + IC(I+1)=IC(I)+KOLD + KOLD=KNEW +40 CONTINUE +C SET JC AND JCI FOR COLUMN STRUCTURE + DO 60 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + DO 50 K=KS,KE + J=JA(K) + L=IC(J) + IC(J)=L+1 + JC(L)=I + JCI(L)=K +50 CONTINUE +60 CONTINUE +C FIX UP IC + KOLD=IC(1) + IC(1)=1 + DO 70 I=1,N + KNEW=IC(I+1) + IC(I+1)=KOLD + KOLD=KNEW +70 CONTINUE +C FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE + NP=N+1 + DO 80 I=1,NP + ILU(I)=IA(I) +80 CONTINUE +C MOVE ELEMENTS, SET JLU AND ID + DO 100 J=1,N + KS=IC(J) + KE=IC(J+1)-1 + DO 90 K=KS,KE + I=JC(K) + L=ILU(I) + ILU(I)=L+1 + JLU(L)=J + KK=JCI(K) + ALU(L)=A(KK) + IF (I.EQ.J) ID(J)=L +90 CONTINUE +100 CONTINUE +C RESET ILU (COULD JUST USE IA) + DO 110 I=1,NP + ILU(I)=IA(I) +110 CONTINUE +C FINISHED WITH SORTED COLUMN AND ROW STRUCTURE +C +C DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION + DO 120 I=1,N + V(I)=0.0D0 +120 CONTINUE + DO 200 IROW=1,N + I=ID(IROW) + PIVOT=ALU(I) + IF (PIVOT.NE.0.0D0) GO TO 140 +C THIS CASE MAKES THE ILU LESS ACCURATE + ICBAD=ICBAD+1 + KS=ILU(IROW) + KE=ILU(IROW+1)-1 + DO 130 K=KS,KE + PIVOT=PIVOT+DABS(ALU(K)) +130 CONTINUE + IF (PIVOT.EQ.0.0D0) GO TO 220 +140 PIVOT=1.0D0/PIVOT + ALU(I)=PIVOT + KKS=I+1 + KKE=ILU(IROW+1)-1 + IF (KKS.GT.KKE) GO TO 160 + DO 150 K=KKS,KKE + J=JLU(K) + V(J)=ALU(K) +150 CONTINUE +C FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX +160 KS=IC(IROW) + KE=IC(IROW+1)-1 + DO 190 K=KS,KE + I=JC(K) + IF (I.LE.IROW) GO TO 190 + LS=ILU(I) + LE=ILU(I+1)-1 + DO 180 L=LS,LE + J=JLU(L) + IF (J.LT.IROW) GO TO 180 + IF (J.GT.IROW) GO TO 170 + AMULT=ALU(L)*PIVOT + ALU(L)=AMULT + IF (AMULT.EQ.0.0) GO TO 190 + GO TO 180 +170 IF (V(J).EQ.0.0D0) GO TO 180 + ALU(L)=ALU(L)-AMULT*V(J) +180 CONTINUE +190 CONTINUE +C RESET V + IF (KKS.GT.KKE) GO TO 200 + DO 195 K=KKS,KKE + J=JLU(K) + V(J)=0.0D0 +195 CONTINUE +200 CONTINUE +C NORMAL RETURN + DLU0 = .FALSE. + RETURN +C ERROR RETURNS +210 IE=I + DLU0 = .TRUE. + RETURN +220 IE=IROW + DLU0 = .TRUE. + RETURN + END + SUBROUTINE DNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2, + * EPS,ITER,IE) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N), + * R(N),P(N),S1(N),S2(N) +C NONSYMMETRIC CONJUGATE GRADIENT +C WHERE: +C N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE DILUCG. +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU. +C JLU GIVES COLUMN NUMBER. +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U. +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U. +C R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE +C ITERATIONS. +C FOLLOWING PARAMETER ADDED BY J. THORNBURG, 17/MAY/85. +C EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE +C DILUCG). +C ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT". +C IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF +C NO CONVERGENCE. +C +C R0=B-A*X0 + CALL DMUL10(N,IA,JA,A,X,R) + DO 10 I=1,N + R(I)=B(I)-R(I) +10 CONTINUE +C P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0 +C FIRST SOLVE L*LT*S1=R0 + CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C TIMES TRANSPOSE OF A + CALL DMUL20(N,IA,JA,A,S1,S2) +C THEN SOLVE UT*U*P=S2 + CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,P) + IE=0 +C INPROD IS DOT PRODUCT ROUTINE IN *LIBRARY (UBC MATRIX P 28) +C ALL CALLS ON IT COMMENTED OUT AND REPLACED WITH CALLS TO NEW +C ROUTINE DGVV(...); SOURCE CODE FOR LATTER ADDED TO END OF +C THIS FILE. - J. THORNBURG, 10/MAY/85. +C CALL INPROD(R,S1,EDOT,RDOT,N) + RDOT = DGVV(R,S1,N) +C LOOP BEGINS HERE +20 CALL DMUL30(N,ILU,JLU,ID,ALU,P,S2) +C CALL INPROD(P,S2,EDOT,PDOT,N) + PDOT = DGVV(P,S2,N) +C + IF (PDOT.EQ.0.0D0) RETURN +C + ALPHA=RDOT/PDOT +C EQUATION 9PA ALPHA=(R,LINV*R)/(P,UT*U*P) + XMAX=1.0D0 + XDIF=0.0D0 + DO 30 I=1,N + AP=ALPHA*P(I) + X(I)=X(I)+AP +C EQUATION 9PB X=X+ALPHA*P + AP=DABS(AP) + XX=DABS(X(I)) + IF (AP.GT.XDIF) XDIF=AP + IF (XX.GT.XMAX) XMAX=XX +30 CONTINUE + IE=IE+1 +C +C CONVERGENCE TEST (CHANGED BY J. THORNBURG, 17/MAY/85, 4/AUG/89) +C + IF ((EPS .GT. 0.0D0) .AND. (XDIF .LE. EPS * XMAX)) RETURN + IF ((EPS .LT. 0.0D0) .AND. (XMAX + XDIF/DABS(EPS) .EQ. XMAX)) + * RETURN +C +C EXCEEDED ITERATION LIMIT? +C + IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60 + CALL DMUL10(N,IA,JA,A,P,S2) + DO 40 I=1,N + R(I)=R(I)-ALPHA*S2(I) +C EQUATION 9PC R=R-ALPHA*A*P +40 CONTINUE + CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1) +C CALL INPROD(R,S1,EDOT,RRDOT,N) + RRDOT = DGVV(R,S1,N) + BETA=RRDOT/RDOT +C EQUATION 9PD BETA=(R+,LINV*R+)/(R,LINV*R) + RDOT=RRDOT + CALL DMUL20(N,IA,JA,A,S1,S2) + CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,S1) + DO 50 I=1,N + P(I)=S1(I)+BETA*P(I) +C EQUATION 9PE P=(UT*U)(-1)*AT*(L*LT)(-1)*R+BETA*P +50 CONTINUE + GO TO 20 +60 IE=-IE + RETURN + END + SUBROUTINE DMUL10(N,IA,JA,A,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 20 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + SUM=0.0D0 + DO 10 K=KS,KE + J=JA(K) + SUM=SUM+A(K)*B(J) +10 CONTINUE + X(I)=SUM +20 CONTINUE + RETURN + END + SUBROUTINE DMUL20(N,IA,JA,A,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION IA(*),JA(*),A(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF A TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW +C JA GIVES COLUMN NUMBER +C A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC +C MATRIX STORED BY ROW +C B IS THE VECTOR +C X IS THE PRODUCT (MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0D0 +10 CONTINUE + DO 30 I=1,N + KS=IA(I) + KE=IA(I+1)-1 + BB=B(I) + DO 20 K=KS,KE + J=JA(K) + X(J)=X(J)+A(K)*BB +20 CONTINUE +30 CONTINUE + RETURN + END + SUBROUTINE DMUL30(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS +C B IS THE VECTOR +C X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B) +C + DO 10 I=1,N + X(I)=0.0D0 +10 CONTINUE + DO 50 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + DIAG=1.0D0/ALU(KS-1) + XX=DIAG*B(I) + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + XX=XX+ALU(K)*B(J) +20 CONTINUE +30 X(I)=X(I)+DIAG*XX + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)+ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + SUBROUTINE DSUBU0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU +C JLU GIVES COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW +C WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ID(I)+1 + KE=ILU(I+1)-1 + XX=X(I)*ALU(KS-1) + X(I)=XX + IF (KS.GT.KE) GO TO 30 + DO 20 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +20 CONTINUE +30 CONTINUE +C BACK SUBSTITUTION + DO 60 II=1,N + I=NP-II + KS=ID(I)+1 + KE=ILU(I+1)-1 + SUM=0.0D0 + IF (KS.GT.KE) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +40 CONTINUE +50 X(I)=(X(I)-SUM)*ALU(KS-1) +60 CONTINUE + RETURN + END + SUBROUTINE DSUBL0(N,ILU,JLU,ID,ALU,B,X) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N) +C DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B +C WHERE: +C N IS THE ORDER OF THE MATRIX +C ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU +C JLU GIVES THE COLUMN NUMBER +C ID GIVES INDEX OF DIAGONAL ELEMENT OF U +C ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW +C DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED +C B IS THE RHS VECTOR +C X IS THE SOLUTION VECTOR +C + NP=N+1 + DO 10 I=1,N + X(I)=B(I) +10 CONTINUE +C FORWARD SUBSTITUTION + DO 30 I=1,N + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 30 + SUM=0.0D0 + DO 20 K=KS,KE + J=JLU(K) + SUM=SUM+ALU(K)*X(J) +20 CONTINUE + X(I)=X(I)-SUM +30 CONTINUE +C BACK SUBSTITUTION + DO 50 II=1,N + I=NP-II + KS=ILU(I) + KE=ID(I)-1 + IF (KS.GT.KE) GO TO 50 + XX=X(I) + IF (XX.EQ.0.0) GO TO 50 + DO 40 K=KS,KE + J=JLU(K) + X(J)=X(J)-ALU(K)*XX +40 CONTINUE +50 CONTINUE + RETURN + END + DOUBLE PRECISION FUNCTION DGVV(V,W,N) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION V(N),W(N) +C SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT. +C + SUM = 0.0D0 + DO 10 I = 1,N + SUM = SUM + V(I)*W(I) +10 CONTINUE + DGVV = SUM + RETURN + END diff --git a/src/elliptic/ilucg_wrapper.F77 b/src/elliptic/ilucg_wrapper.F77 new file mode 100644 index 0000000..d1ab803 --- /dev/null +++ b/src/elliptic/ilucg_wrapper.F77 @@ -0,0 +1,72 @@ +c ilucg_wrapper.F77 -- wrapper routines for [sd]ilucg() +c $Header$ + +c +c These subroutines are wrappers around the [sd]ilucg() functions. +c These subroutines take only integer/real/double precision arguments, +c avoiding problems with C/C++ --> Fortran handling of the logical +c result returned by [sd]ilucg(). +c +c Arguments: +c istatus = (out) This is the "IE" return value from [sd]ilucg() +c ierror = (out) This is returned as 0 for a normal return, +c nonzero for an error return. +c + +#include "config.h" + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + subroutine silucg_wrapper(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus, ierror) + integer n + integer ia(*), ja(*) + real a(*), b(*), x(*) + integer itemp(*) + real rtemp(*) + real eps + integer iter, istatus, ierror +c + logical silucg +c + ierror = 0 + if (silucg(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus)) ierror = 1 + return + end +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG + subroutine dilucg_wrapper(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus, ierror) + integer n + integer ia(*), ja(*) + double precision a(*), b(*), x(*) + integer itemp(*) + double precision rtemp(*) + double precision eps + integer iter, istatus, ierror +c + logical dilucg +c + ierror = 0 + if (dilucg(n, + $ ia, ja, a, + $ b, x, + $ itemp, rtemp, + $ eps, iter, + $ istatus)) ierror = 1 + return + end +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/elliptic/lapack.h b/src/elliptic/lapack.h index 1e86616..0927bff 100644 --- a/src/elliptic/lapack.h +++ b/src/elliptic/lapack.h @@ -4,7 +4,7 @@ /* * prerequisites: * "cctk.h" - * "config.hh" // for "integer" = Fortran integer + * "config.h" // for "integer" = Fortran integer */ #ifdef __cplusplus diff --git a/src/elliptic/lapack_wrapper.F77 b/src/elliptic/lapack_wrapper.F77 new file mode 100644 index 0000000..25796b3 --- /dev/null +++ b/src/elliptic/lapack_wrapper.F77 @@ -0,0 +1,59 @@ +c lapack_wrapper.f -- wrapper routines for LAPACK [sd]gecon() +c $Header$ + +c +c These subroutines are wrappers around the LAPACK [sd]gecon() subroutines. +c These subroutines take only integer/real/double precision arguments, +c avoiding problems with C/C++ --> Fortran passing of the character string +c arguments used by [sd]gecon(). +c +c Arguments: +c norm_int = (in) 0 ==> infinity-norm +c 1 ==> 1-norm +c + +#include "config.h" + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + subroutine sgecon_wrapper(norm_int, + $ N, A, LDA, anorm, rcond, + $ WORK, IWORK, info) + integer norm_int + integer N, LDA + real A(LDA,N) + real anorm, rcond + real WORK(*) + integer iwork(*) + integer info + if (norm_int .eq. 0) then + call sgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else if (norm_int .eq. 1) then + call sgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else + info = -1; + end if + return + end +#endif /* HAVE_DENSE_JACOBIAN__LAPACK */ + +#ifdef HAVE_DENSE_JACOBIAN__LAPACK + subroutine dgecon_wrapper(norm_int, + $ N, A, LDA, anorm, rcond, + $ WORK, IWORK, info) + integer norm_int + integer N, LDA + double precision A(LDA,N) + double precision anorm, rcond + double precision WORK(*) + integer iwork(*) + integer info + if (norm_int .eq. 0) then + call dgecon('I', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else if (norm_int .eq. 1) then + call dgecon('1', N,A,LDA, anorm,rcond, WORK,IWORK, info) + else + info = -1; + end if + return + end +#endif /* HAVE_DENSE_JACOBIAN__LAPACK */ diff --git a/src/elliptic/make.code.defn b/src/elliptic/make.code.defn index 28308a0..553d372 100644 --- a/src/elliptic/make.code.defn +++ b/src/elliptic/make.code.defn @@ -3,8 +3,8 @@ # Source files in this directory SRCS = Jacobian.cc \ - dense_Jacobian.cc gecon_wrapper.F77 \ - row_sparse_Jacobian.cc + dense_Jacobian.cc lapack_wrapper.F77 \ + row_sparse_Jacobian.cc ilucg_wrapper.F77 ilucg.f77 # Subdirectories containing source files SUBDIRS = diff --git a/src/elliptic/row_sparse_Jacobian.cc b/src/elliptic/row_sparse_Jacobian.cc index 131655c..7d0be44 100644 --- a/src/elliptic/row_sparse_Jacobian.cc +++ b/src/elliptic/row_sparse_Jacobian.cc @@ -2,22 +2,28 @@ // $Header$ // +// <<<liberal contents of "ilucg.h">>> +// #ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity -// row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity -// row_sparse_Jacobian_sparsity::zero_matrix -// row_sparse_Jacobian_sparsity::note_new_row -// row_sparse_Jacobian_sparsity::note_element +// row_sparse_Jacobian::row_sparse_Jacobian +// row_sparse_Jacobian::~row_sparse_Jacobian +// row_sparse_Jacobian::element +// row_sparse_Jacobian::zero_matrix +// row_sparse_Jacobian::set_element +// row_sparse_Jacobian::sum_into_element +/// row_sparse_Jacobian::find_element +/// row_sparse_Jacobian::insert_element + #ifdef DEBUG_ROW_SPARSE_JACOBIAN +/// row_sparse_Jacobian::print_data_structure + #endif #endif // -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix -// row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix -// row_sparse_Jacobian_matrix::zero_matrix -// row_sparse_Jacobian_matrix::find_element -// row_sparse_Jacobian_matrix::start_new_row -// row_sparse_Jacobian_matrix::insert_element +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG +// row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG +// row_sparse_Jacobian__ILUCG::solve_linear_system #endif +// #include <stdio.h> #include <assert.h> @@ -52,16 +58,102 @@ using jtutil::error_exit; //****************************************************************************** //****************************************************************************** +// +// FIXME: +// Cactus's CCTK_FCALL() isn't expanded in .h files (this is a bug), +// so we include the contents of "lapack.h" inline here. This is a +// kludge! :( :( +//#include "ilucg.h" +// + +//***** begin "ilucg.h" contents ****** +/* ilucg.h -- C/C++ prototypes for [sd]ilucg() routines */ +/* $Header$ */ + +/* + * prerequisites: + * "cctk.h" + * "config.h" // for "integer" = Fortran integer + */ + +#ifdef __cplusplus +extern "C" + { +#endif + +/* + * ***** ILUCG ***** + */ +integer CCTK_FCALL + CCTK_FNAME(silucg)(const integer* N, + const integer ia[], const integer ja[], const float a[], + const float b[], float x[], + integer itemp[], float rtemp[], + const float* eps, const integer* iter, + integer* istatus); +integer CCTK_FCALL + CCTK_FNAME(dilucg)(const integer* N, + const integer ia[], const integer ja[], const double a[], + const double b[], double x[], + integer itemp[], double rtemp[], + const double* eps, const integer* iter, + integer* istatus); + +/* + * ***** wrappers (for returning logical results) ***** + */ +void CCTK_FCALL + CCTK_FNAME(silucg_wrapper) + (const integer* N, + const integer ia[], const integer ja[], const float a[], + const float b[], float x[], + integer itemp[], float rtemp[], + const float* eps, const integer* iter, + integer* istatus, integer* ierror); +void CCTK_FCALL + CCTK_FNAME(dilucg_wrapper) + (const integer* N, + const integer ia[], const integer ja[], const double a[], + const double b[], double x[], + integer itemp[], double rtemp[], + const double* eps, const integer* iter, + integer* istatus, integer* ierror); + +#ifdef __cplusplus + } /* extern "C" */ +#endif +//***** end "ilucg.h" contents ****** + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function constructs a row_sparse_Jacobian_sparsity object. +// This function constructs a row_sparse_Jacobian object. // -row_sparse_Jacobian_sparsity::row_sparse_Jacobian_sparsity - (patch_system& ps, - bool print_msg_flag /* = false */) - : Jacobian_sparsity(ps, print_msg_flag), - JJs_at_this_II_(new int[NN()]) +row_sparse_Jacobian::row_sparse_Jacobian(patch_system& ps, + bool print_msg_flag /* = false */) + : Jacobian(ps), + N_nonzeros_(0), + N_nonzeros_allocated_(FD_GRID__MOL_AREA * N_rows_), + // initial guess, insert_element() + // will grow if/when necessary + current_N_rows_(0), + IA_(new integer[N_rows_+1]), + JA_(new integer[N_nonzeros_allocated_]), + A_(new fp [N_nonzeros_allocated_]) { +if (print_msg_flag) + then { + CCTK_VInfo(CCTK_THORNSTRING, + "constructing row_sparse_Jacobian: N_rows_=%d", + N_rows_); + CCTK_VInfo(CCTK_THORNSTRING, + " initial N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + } + zero_matrix(); } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -70,11 +162,13 @@ zero_matrix(); #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function destroys a row_sparse_Jacobian_sparsity object. +// This function destroys a row_sparse_Jacobian object. // -row_sparse_Jacobian_sparsity::~row_sparse_Jacobian_sparsity() +row_sparse_Jacobian::~row_sparse_Jacobian() { -delete[] JJs_at_this_II_; +delete[] A_; +delete[] JA_; +delete[] IA_; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -82,13 +176,13 @@ delete[] JJs_at_this_II_; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes the zeroing of the entire matrix +// This function gets a matrix element. // -void row_sparse_Jacobian_sparsity::zero_matrix() +fp row_sparse_Jacobian::element(int II, int JJ) + const { -N_nonzeros_ = 0; -current_II_ = 0; -N_JJs_at_this_II_ = 0; +const int fposn = find_element(II,JJ); +return (fposn > 0) ? fA(fposn) : 0.0; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -96,12 +190,16 @@ N_JJs_at_this_II_ = 0; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes that we're starting a new row in the matrix traversal. +// This function zeros a row_sparse_Jacobian object. // -void row_sparse_Jacobian_sparsity::note_new_row(int II) +void row_sparse_Jacobian::zero_matrix() { -current_II_ = II; -N_JJs_at_this_II_ = 0; +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf("row_sparse_Jacobian::zero_matrix()\n"); +#endif +N_nonzeros_ = 0; +current_N_rows_= 0; +fIA(1) = 1; } #endif // HAVE_ROW_SPARSE_JACOBIAN @@ -109,120 +207,269 @@ N_JJs_at_this_II_ = 0; #ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function notes that the (II,JJ) matrix element needs to be stored. +// This function sets a matrix element to a specified value. // -void row_sparse_Jacobian_sparsity::note_element(int II, int JJ) +void row_sparse_Jacobian::set_element(int II, int JJ, fp value) { -if (II != current_II_) - then { - N_nonzeros_ += N_JJs_at_this_II_; // update totals for old row - note_new_row(II); - } - -// have we already seen this JJ in this row? - for (int jindex = 0 ; jindex < N_JJs_at_this_II_ ; ++jindex) - { - if (JJs_at_this_II_[jindex] == JJ) - then { - // yes, we've already seen it - // ==> no-op here - return; - } - } - -// get to here ==> no, we haven't seen this JJ before in this roW -// ==> add it to this row -if (N_JJs_at_this_II_ >= NN()) - then error_exit(ERROR_EXIT, -"***** row_sparse_Jacobian_sparsity():\n" -" row buffer overflowed (this should never happen!)\n" -" before insertion N_JJs_at_this_II_=%d >= NN()=%d\n" -" N_nonzeros_=%d current_II_=%d\n" -" II=%d JJ=%d\n" - , - N_JJs_at_this_II_, NN(), - N_nonzeros_, current_II_, - II, JJ); /*NOTREACHED*/ -JJs_at_this_II_[N_JJs_at_this_II_++] = JJ; +const int fposn = find_element(II,JJ); +if (fposn > 0) + then fA(fposn) = value; + else insert_element(II,JJ, value); } #endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** -//****************************************************************************** -//****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function constructs a row_sparse_Jacobian_matrix object. +// This function sums a value into a matrix element. // -row_sparse_Jacobian_matrix::row_sparse_Jacobian_matrix - (patch_system& ps, - row_sparse_Jacobian_sparsity& JS, - bool print_msg_flag /* = false */) - : Jacobian_matrix(ps, JS), - N_nonzeros_(JS.N_nonzeros()), - A_(new fp[N_nonzeros_]), - IA_(new integer[NN()+1]), - JA_(new integer[N_nonzeros_]) +void row_sparse_Jacobian::sum_into_element(int II, int JJ, fp value) { -if (print_msg_flag) - then CCTK_VInfo(CCTK_THORNSTRING, - "row_sparse_Jacobian_matrix ctor: N_nonzeros_=%d", - N_nonzeros_); - -zero_matrix(); +const int fposn = find_element(II,JJ); +if (fposn > 0) + then fA(fposn) += value; + else insert_element(II,JJ, value); } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function destroys a row_sparse_Jacobian_matrix object. +// This function searches our data structures for element (II,JJ). +// If found, it returns the position (1-origin) in A_ and JA_. +// If not found, it returns 0. // -row_sparse_Jacobian_matrix::~row_sparse_Jacobian_matrix() +int row_sparse_Jacobian::find_element(int II, int JJ) + const { -delete[] JA_; -delete[] IA_; -delete[] A_; +const int fII = fsub(II); +const int fJJ = fsub(JJ); + +if (fII > current_N_rows_) + then return 0; // this row not defined yet + +const int fstart = fIA(fII); +const int fstop = fIA(fII + 1); + for (int fposn = fstart ; fposn < fstop ; ++fposn) + { + if (fJA(fposn) == fJJ) + then return fposn; // found + } + +return 0; // not found } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN // -// This function zeros a row_sparse_Jacobian_matrix object. +// This function inserts a new element in the matrix, starting a new +// row and/or growing the arrays if necessary. It should only be called +// if JJ isn't already in this row. // -void row_sparse_Jacobian_matrix::zero_matrix() +int row_sparse_Jacobian::insert_element(int II, int JJ, fp value) { -current_N_nonzeros_ = 0; -current_II_ = 0; -JA_[current_II_] = fsub(0); +const int fII = fsub(II); +const int fJJ = fsub(JJ); + +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf( + "row_sparse_Jacobian::insert_element(): fII=%d fJJ=%d\n", + fII, fJJ); +#endif + +if (fII != current_N_rows_) + then { + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" starting new row\n"); + #endif + assert(fII == current_N_rows_+1); + assert(current_N_rows_ < N_rows_); + ++current_N_rows_; + fIA(current_N_rows_+1) = fIA(current_N_rows_); + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + print_data_structure(); + #endif + } + +if (fIA(fII+1) > N_nonzeros_allocated_) + then { + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" growing arrays from N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + #endif + N_nonzeros_allocated_ += (N_nonzeros_allocated_ >> 1); + #ifdef DEBUG_ROW_SPARSE_JACOBIAN + printf(" to %d\n", N_nonzeros_allocated_); + #endif + integer* const new_JA = new integer[N_nonzeros_allocated_]; + fp * const new_A = new fp [N_nonzeros_allocated_]; + for (int posn = 0 ; posn < N_nonzeros_ ; ++posn) + { + new_JA[posn] = JA_[posn]; + new_A[posn] = A_[posn]; + } + delete[] A_; + delete[] JA_; + JA_ = new_JA; + A_ = new_A; + } + +++N_nonzeros_; +const int fposn = fIA(fII+1)++; +fJA(fposn) = fJJ; + fA(fposn) = value; +#ifdef DEBUG_ROW_SPARSE_JACOBIAN +printf(" storing at fposn=%d (new N_nonzeros_=%d)\n", fposn, N_nonzeros_); +#endif +return fposn; } +#endif // HAVE_ROW_SPARSE_JACOBIAN //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN +#ifdef DEBUG_ROW_SPARSE_JACOBIAN // -// This function searches our data structures for element (II,JJ). -// If found, it returns the position (0-origin) in A_ and JA_. -// If not found, it returns -1. +// This function prints the sparse-matrix data structure. // -int row_sparse_Jacobian_matrix::find_element(int II, int JJ) +void row_sparse_Jacobian::print_data_structure() const { -const int start = IA(II); -const int stop = IA(II+1); - for (int posn = start ; posn < stop ; ++posn) +printf("--- begin Jacobian printout\n"); + for (int fII = 1 ; fII <= current_N_rows_ ; ++fII) { - if (JA(posn) == JJ) - then return posn; // found + printf("fII=%d", fII); + const int fstart = fIA(fII); + const int fstop = fIA(fII + 1); + printf("\tfposn=[%d,%d):", fstart, fstop); + printf("\tfJJ ="); + for (int fposn = fstart ; fposn < fstop ; ++fposn) + { + printf(" %d", fJA(fposn)); + } + printf("\n"); } +printf("--- end Jacobian printout\n"); +} +#endif // DEBUG_ROW_SPARSE_JACOBIAN +#endif // HAVE_ROW_SPARSE_JACOBIAN + +//****************************************************************************** +//****************************************************************************** +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This function constructs a row_sparse_Jacobian__ILUCG object. +// +row_sparse_Jacobian__ILUCG::row_sparse_Jacobian__ILUCG + (patch_system& ps, + bool print_msg_flag /* = false */) + : row_sparse_Jacobian(ps, print_msg_flag), + print_msg_flag_(print_msg_flag), + itemp_(NULL), + rtemp_(NULL) +{ } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG -return -1; // not found +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This function destroys a row_sparse_Jacobian__ILUCG object. +// +row_sparse_Jacobian__ILUCG::~row_sparse_Jacobian__ILUCG() +{ +delete[] rtemp_; +delete[] itemp_; } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG // -// This function starts a new row in the matrix. +// This function solves the linear system J.x = rhs, with rhs and x +// being nominal-grid gridfns, using an ILUCG subroutine originally +// provided to me in 1985 (!) by Tom Nicol of the UBC Computing Center. +// It returns -1.0 (no condition number estimate). // -void row_sparse_Jacobian_matrix::start_new_row(int II) +fp row_sparse_Jacobian__ILUCG::solve_linear_system(int rhs_gfn, int x_gfn) { -// FIXME FIXME +assert(current_N_rows_ == N_rows_); + +// +// if this is our first call, allocate the scratch arrays +// +if (itemp_ == NULL) + then { + if (print_msg_flag_) + then { + CCTK_VInfo(CCTK_THORNSTRING, + "row_sparse_Jacobian__ILUCG::solve_linear_system()"); + CCTK_VInfo(CCTK_THORNSTRING, + " N_rows_=%d N_nonzeros_=%d", + N_rows_, N_nonzeros_); + CCTK_VInfo(CCTK_THORNSTRING, + " N_nonzeros_allocated_=%d", + N_nonzeros_allocated_); + } + itemp_ = new integer[3*N_rows_ + 3*N_nonzeros_ + 2]; + rtemp_ = new fp [4*N_rows_ + N_nonzeros_ ]; + } + +// +// set up the ILUCG subroutine +// + +// initial guess = all zeros +fp *x = ps_.gridfn_data(x_gfn); + for (int II = 0 ; II < N_rows_ ; ++II) + { + x[II] = 0.0; + } + +const integer N = N_rows_; +const fp *rhs = ps_.gridfn_data(rhs_gfn); +const fp eps = -1000.0; // solve to 1000 * machine epsilon +const integer max_iterations = 0; // no limit on iterations +integer istatus, ierror; + +// the actual linear solution +#if defined(FP_IS_FLOAT) + CCTK_FNAME(silucg_wrapper)(&N, + IA_, JA_, A_, + rhs, x, + itemp_, rtemp_, + &eps, &max_iterations, + &istatus, &ierror); +#elif defined(FP_IS_DOUBLE) + CCTK_FNAME(dilucg_wrapper)(&N, + IA_, JA_, A_, + rhs, x, + itemp_, rtemp_, + &eps, &max_iterations, + &istatus, &ierror); +#else + #error "don't know fp datatype!" +#endif + +if (ierror != 0) + then CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING, +"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n" +" error return istatus=%d from [sd]ilucg() routine!" + , + rhs_gfn, x_gfn, + int(istatus)); /*NOTREACHED*/ +if (print_msg_flag_) + then CCTK_VInfo(CCTK_THORNSTRING, + " solution found in %d CG iterations", + istatus); + +return -1.0; // no condition number estimate available } +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/elliptic/row_sparse_Jacobian.hh b/src/elliptic/row_sparse_Jacobian.hh index 6e4d962..2eb6f6d 100644 --- a/src/elliptic/row_sparse_Jacobian.hh +++ b/src/elliptic/row_sparse_Jacobian.hh @@ -3,8 +3,10 @@ // #ifdef HAVE_ROW_SPARSE_JACOBIAN -// row_sparse_Jacobian_sparsity -- (no-op) accumulate ... sparsity info -// row_sparse_Jacobian_matrix -- Jacobian stored as row-oriented sparse matrix +// row_sparse_Jacobian -- Jacobian stored as row-oriented sparse matrix +#endif +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// row_sparse_Jacobian__ILUCG -- row_sparse_Jacobian with ILUCG linear solver #endif // @@ -14,183 +16,187 @@ // "Jacobian.hh" // +#undef DEBUG_ROW_SPARSE_JACOBIAN // defined this for *very* detailed + // printing of data structures + //****************************************************************************** +#ifdef HAVE_ROW_SPARSE_JACOBIAN +// +// This class stores the Jacobian as a row-oriented sparse matrix. // -// The row-oriented sparse matrix format is widely used by sparse matrix -// libraries. In this format, the matrix is represented by 3 arrays: -// A[N_nonzeros] = nonzero matrix elements, ordered by rows; +// This sparse matrix format is widely used by sparse matrix libraries. +// The format is as follows: The matrix is represented by 3 arrays +// (where () denote Fortran 1-origin subscripting): +// IA(N_rows+1) = Fortran (1-origin) indices in A of the start +// of each row's nonzeros; IA(N_rows+1) = N_nonzeros + 1 +// JA(N_nonzeros) = Fortran (1-origin) column numbers of the +// corresponding entries in A +// A(N_nonzeros) = nonzero matrix elements, ordered by rows; // within a row the matrix elements may be in // any order -// IA[N_rows+1] = Fortran (1-origin) indices in A of the start -// of each row's nonzeros -// JA[N_nonzeros] = Fortran (1-origin) column numbers of the -// corresponding entries in A // IA and JA are arrays of integer so as to be storage-compatible with // Fortran. // - -//****************************************************************************** - -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// -// This class accumulates the sparsity pattern of a row-oriented -// sparse-matrix Jacobian. We assume that the Jacobian is traversed -// by rows. +// For example, the matrix +// [ 11.0 12.0 13.0 16.0 ] +// [ 21.0 22.0 24.0 25.0 ] +// [ 32.0 34.0 ] +// [ 42.0 44.0 46.0 ] +// [ 53.0 55.0 ] +// [ 61.0 64.0 66.0 ] +// could be represented by the arrays (Fortran indexing) +// IA( 1) = 1 +// JA( 1) = 1 JA( 2) = 2 JA( 3) = 3 JA( 4) = 6 +// A( 1) = 11.0 A( 2) = 12.0 A( 3) = 13.0 A( 4) = 16.0 +// IA( 2) = 5 +// JA( 5) = 1 JA( 6) = 2 JA( 7) = 4 JA( 8) = 5 +// A( 5) = 21.0 A( 6) = 22.0 A( 7) = 24.0 A( 8) = 25.0 +// IA( 3) = 9 +// JA( 9) = 2 JA(10) = 4 +// A( 9) = 32.0 A(10) = 34.0 +// IA( 4) = 11 +// JA(11) = 2 JA(12) = 4 JA(13) = 6 +// A(11) = 42.0 A(12) = 44.0 A(13) = 46.0 +// IA( 5) = 14 +// JA(14) = 3 JA(15) = 5 +// A(14) = 53.0 A(15) = 55.0 +// IA( 6) = 16 +// JA(16) = 1 JA(17) = 4 JA(18) = 6 +// A(16) = 61.0 A(17) = 64.0 A(18) = 66.0 +// IA( 7) = 19 // -class row_sparse_Jacobian_sparsity - : public Jacobian_sparsity +class row_sparse_Jacobian + : public Jacobian { public: - // record the zeroing of the entire matrix + // + // routines to access the matrix + // + + // get a matrix element + fp element(int II, int JJ) const; + + // is the matrix element (II,JJ) stored explicitly? + bool is_explicitly_stored(int II, int JJ) const + { return find_element(II,JJ) > 0; } + + + // + // routines for setting values in the matrix + // + + // zero the entire matrix void zero_matrix(); - // record the setting of a matrix element to a specified value - void set_element(int II, int JJ, fp value) - { note_element(II, JJ); } + // set a matrix element to a specified value + void set_element(int II, int JJ, fp value); - // record the summing of a value into a matrix element - void sum_into_element(int II, int JJ, fp value) - { note_element(II, JJ); } + // sum a value into a matrix element + void sum_into_element(int II, int JJ, fp value); - // read out total number of nonzeros - int N_nonzeros() const - { return N_nonzeros_; } + // // constructor, destructor - row_sparse_Jacobian_sparsity(patch_system& ps, - bool print_msg_flag = false); - ~row_sparse_Jacobian_sparsity(); - -private: - // note start of a new row in the matrix traversal - void note_new_row(int II); + // +protected: + row_sparse_Jacobian(patch_system& ps, + bool print_msg_flag = false); + ~row_sparse_Jacobian(); - // note that the (II,JJ) matrix element needs to be stored - void note_element(int II, int JJ); -private: // - // Our goal is to count the total number of distinct matrix - // elements stored. The problem is, there's no easy way to - // know what size buffers to allocate to keep track of them. - // We could use growing buffers, but instead we keep track of - // the set of distinct columns (JJ) seen within each row (II). - // (Here a buffer of size NN() is guaranteed to be sufficient.) - // Then at the end of each row we can update our running total. - // - int N_nonzeros_; - int current_II_; - int N_JJs_at_this_II_; - - // --> new[]-allocated array of JJ values for nonzeros in this row - // only values [0, N_JJs_at_this_II) used - int* JJs_at_this_II_; - }; -#endif // HAVE_ROW_SPARSE_JACOBIAN + // internal routines + // +protected: + // return position (1-origin) of (II,JJ) in A_ and JA_ + // return 0 if not found + int find_element(int II, int JJ) const; -//****************************************************************************** + // insert new array element (II,JJ) in matrix, + // starting new row and/or growing arrays if necessary + // return position (1-origin) in A_ and JA_ + int insert_element(int II, int JJ, fp value); -#ifdef HAVE_ROW_SPARSE_JACOBIAN -// -// This class stores the Jacobian as a row-oriented sparse matrix. -// -class row_sparse_Jacobian_matrix - : public Jacobian_matrix - { -public: - // zero the entire matrix - void zero_matrix(); +#ifdef DEBUG_ROW_SPARSE_JACOBIAN + // print data structure (IA_ and JA_ arrays) + void print_data_structure() const; +#endif - // set a matrix element to a specified value - void set_element(int II, int JJ, fp value) + // access IA[] and JA[] and A[] using 1-origin Fortran indices + int& fIA(int fII) const { - const int posn = find_element(II, JJ); - A_[posn] = value; + assert(fII >= 1); + assert(fII <= current_N_rows_+1); + return IA_[csub(fII)]; } - - // sum a value into a matrix element - void sum_into_element(int II, int JJ, fp value) + int& fJA(int fposn) const { - const int posn = find_element(II, JJ); - A_[posn] += value; + assert(fposn >= 1); + assert(fposn <= N_nonzeros_); + return JA_[csub(fposn)]; } - - // access (get) a matrix element - fp element(int II, int JJ) const + fp& fA(int fposn) const { - const int posn = find_element(II, JJ); - return (posn >= 0) ? A_[posn] : 0.0; + assert(fposn >= 1); + assert(fposn <= N_nonzeros_); + return A_[csub(fposn)]; } - // this is a dense matrix ==> all values are stored explicitly - bool is_explicitly_stored(int II, int JJ) const - { return find_element(II,JJ) >= 0; } +protected: + // actual size, size allocated for JA_ and A_ + int N_nonzeros_, N_nonzeros_allocated_; + + // at present rows 1 <= fii <= current_N_rows_ are valid + int current_N_rows_; - // solve linear system J.x = rhs via LAPACK + // --> new[]-allocated arrays + // ***** due to constructor initialization list ordering, + // ***** these must be declared *AFTER* N_nonzeros_allocated_ + integer* IA_; // size N_rows_+1 + // valid for Fortran indices + // 1 <= fII <= current_N_rows_+1 + // ... these are grown (reallocated) as necessary by insert_element() + // ... using STL vector would be much cleaner here, but alas + // there are too many broken compilers/linkers in the world + // world don't fully grok vector :( :( + integer* JA_; // size N_nonzeros_allocated_ + // valid for Fortran indices + // 1 <= fposn < IA(current_N_rows_) + fp* A_; // ditto + }; +#endif // HAVE_ROW_SPARSE_JACOBIAN + +//****************************************************************************** + +#ifdef HAVE_ROW_SPARSE_JACOBIAN__ILUCG +// +// This class defines the linear solver routine using ILUCG. +// +class row_sparse_Jacobian__ILUCG + : public row_sparse_Jacobian + { +public: + // solve linear system J.x = rhs via ILUCG // ... rhs and x are nominal-grid gridfns // ... overwrites Jacobian matrix with its LU decomposition // ... returns condition number if known, // 0.0 if matrix is numerically singular, // -1.0 if condition number is unknown - fp solve_linear_system(int rhs_gfn, int x_gfn) - { return -1.0; }; // FIXME + fp solve_linear_system(int rhs_gfn, int x_gfn); // constructor, destructor public: - row_sparse_Jacobian_matrix(patch_system& ps, - row_sparse_Jacobian_sparsity& JS, + row_sparse_Jacobian__ILUCG(patch_system& ps, bool print_msg_flag = false); - ~row_sparse_Jacobian_matrix(); - -private: - // return posn (0-origin) of (II,JJ) in A_ and JA_, or -1 if not found - int find_element(int II, int JJ) const; - - // start a new row in the matrix - void start_new_row(int II); - - // insert new array element (II,JJ) in current row - // return posn (0-origin) in A_ and JA_ - void insert_element(int II, int JJ, fp value); + ~row_sparse_Jacobian__ILUCG(); private: - // IA[] and JA[], but returning 0-origin indices - int IA(int II) const - { - assert(II >= 0); - assert(II < NN()+1); - return IA_[II] - 1; - } - int JA(int posn) const - { - assert(posn >= 0); - assert(posn < N_nonzeros_); - return JA_[posn] - 1; - } + bool print_msg_flag_; - // values in IA_[] and JA_[] are semantically Fortran subscripts - // this function converts a C subscript to a Fortran subscript - int fsub(int csub) const - { return csub + 1; } - -private: - int N_nonzeros_; - - // - // invariants while we're traversing the matrix: - // A_[0,current_N_nonzeros_) is valid - // IA_[0,current_II] is valid - // JA_[0,current_N_nonzeros_) is valid - // - int current_N_nonzeros_; - int current_II_; - - // --> new[]-allocated arrays - // ***** due to constructor initialization list ordering, - // ***** these must be declared *AFTER* N_nonzeros_ - fp* A_; // size N_nonzeros_; - integer* IA_; // size NN() + 1 - integer* JA_; // size N_nonzeros_; + // work vectors for ILUCG subroutine + // ... allocated by solve_linear_system() on first call + integer* itemp_; + fp* rtemp_; }; -#endif // HAVE_ROW_SPARSE_JACOBIAN +#endif // HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/gr/expansion_Jacobian.cc b/src/gr/expansion_Jacobian.cc index 5faf059..082387b 100644 --- a/src/gr/expansion_Jacobian.cc +++ b/src/gr/expansion_Jacobian.cc @@ -80,7 +80,7 @@ bool expansion_Jacobian_dr_FD(patch_system& ps, // // This function is a top-level driver to compute the Jacobian matrix // J[Theta(h)] of the expansion Theta(h). It just decodes the -// Jacobian_method parameter and calls the appropriate subfunction. +// Jacobian_compute_method parameter and calls the appropriate subfunction. // // We assume that Theta(h) has already been computed. // @@ -96,22 +96,23 @@ bool expansion_Jacobian(patch_system& ps, const struct Jacobian_info& Jacobian_info, bool print_msg_flag /* = false */) { -switch (Jacobian_info.Jacobian_method) +switch (Jacobian_info.Jacobian_compute_method) { -case Jacobian_method__numerical_perturb: +case Jacobian__numerical_perturbation: if (! expansion_Jacobian_NP(ps, Jac, cgi, gi, Jacobian_info, print_msg_flag)) then return false; // *** ERROR RETURN *** break; -case Jacobian_method__symbolic_diff: +case Jacobian__symbolic_diff: CCTK_VWarn(1, __LINE__, __FILE__, CCTK_THORNSTRING, "\n" -" expansion_Jacobian(): Jacobian_method == \"symbolic differentiation\"\n" -" isn't implemented (yet); reverting to\n" -" \"symbolic differentiation with finite diff d/dr\""); +" expansion_Jacobian():\n" +" Jacobian_compute_method == \"symbolic differentiation\"\n" +" isn't implemented (yet); reverting to\n" +" \"symbolic differentiation with finite diff d/dr\""); // fall through -case Jacobian_method__symbolic_diff_with_FD_dr: +case Jacobian__symbolic_diff_with_FD_dr: expansion_Jacobian_partial_SD(ps, Jac, cgi, gi, Jacobian_info, print_msg_flag); @@ -122,10 +123,11 @@ case Jacobian_method__symbolic_diff_with_FD_dr: break; default: error_exit(PANIC_EXIT, -"***** expansion_Jacobian(): unknown Jacobian_info.Jacobian_method=(int)%d!\n" -" (this should never happen!)\n" +"***** expansion_Jacobian():\n" +" unknown Jacobian_info.Jacobian_compute_method=(int)%d!\n" +" (this should never happen!)\n" , - int(Jacobian_info.Jacobian_method)); /*NOTREACHED*/ + int(Jacobian_info.Jacobian_compute_method)); /*NOTREACHED*/ } return true; // *** NORMAL RETURN *** } diff --git a/src/gr/gr.hh b/src/gr/gr.hh index 43df6ae..c20a720 100644 --- a/src/gr/gr.hh +++ b/src/gr/gr.hh @@ -21,14 +21,14 @@ enum geometry_method }; // -// this enum holds the (a) decoded Jacobian_method parameter, +// this enum holds the (a) decoded Jacobian_compute_method parameter, // i.e. it specifies how we compute the (a) Jacobian matrix // -enum Jacobian_method +enum Jacobian_compute_method { - Jacobian_method__numerical_perturb, - Jacobian_method__symbolic_diff_with_FD_dr, - Jacobian_method__symbolic_diff // no comma + Jacobian__numerical_perturbation, + Jacobian__symbolic_diff_with_FD_dr, + Jacobian__symbolic_diff // no comma }; //****************************************************************************** @@ -131,8 +131,8 @@ struct geometry_info // struct Jacobian_info { - enum Jacobian_method Jacobian_method; - enum Jacobian_type Jacobian_storage_method; + enum Jacobian_compute_method Jacobian_compute_method; + enum Jacobian_store_solve_method Jacobian_store_solve_method; fp perturbation_amplitude; }; @@ -181,11 +181,5 @@ void Schwarzschild_EF_geometry(patch_system& ps, enum geometry_method decode_geometry_method(const char geometry_method_string[]); bool geometry_method_is_interp(enum geometry_method geometry_method); -enum Jacobian_method - decode_Jacobian_method(const char Jacobian_method_string[]); -Jacobian_matrix* - create_Jacobian_matrix(patch_system& ps, - const struct cactus_grid_info& cgi, - const struct geometry_info& gi, - const struct Jacobian_info& Jac_info, - bool print_msg_flag = false); +enum Jacobian_compute_method + decode_Jacobian_compute_method(const char Jacobian_compute_method_string[]); diff --git a/src/gr/misc.cc b/src/gr/misc.cc index 3951331..7cdc018 100644 --- a/src/gr/misc.cc +++ b/src/gr/misc.cc @@ -4,7 +4,6 @@ // decode_geometry_method - decode the geometry_method parameter // geomery_method_is_interp - does enum geometry_method specify interpolation? // decode_geometry_method - decode the geometry_method parameter -// create_Jacobian_matrix - create a new Jacobian_matrix object // #include <stdio.h> @@ -85,68 +84,24 @@ default: //****************************************************************************** // -// This function decodes the Jacobian_method parameter (string) into +// This function decodes the Jacobian_compute_method parameter (string) into // an internal enum for future use. // -enum Jacobian_method - decode_Jacobian_method(const char Jacobian_method_string[]) +enum Jacobian_compute_method + decode_Jacobian_compute_method(const char Jacobian_compute_method_string[]) { -if (STRING_EQUAL(Jacobian_method_string, +if (STRING_EQUAL(Jacobian_compute_method_string, "numerical perturbation")) - then return Jacobian_method__numerical_perturb; -else if (STRING_EQUAL(Jacobian_method_string, + then return Jacobian__numerical_perturbation; +else if (STRING_EQUAL(Jacobian_compute_method_string, "symbolic differentiation with finite diff d/dr")) - then return Jacobian_method__symbolic_diff_with_FD_dr; -else if (STRING_EQUAL(Jacobian_method_string, + then return Jacobian__symbolic_diff_with_FD_dr; +else if (STRING_EQUAL(Jacobian_compute_method_string, "symbolic differentiation")) - then return Jacobian_method__symbolic_diff; + then return Jacobian__symbolic_diff; else CCTK_VWarn(-1, __LINE__, __FILE__, CCTK_THORNSTRING, -"decode_Jacobian_method(): unknown Jacobian_method_string=\"%s\"!", - Jacobian_method_string); /*NOTREACHED*/ -} - -//****************************************************************************** - -// -// This function creates a new-allocated Jacobian matrix and returns a -// pointer to it. In more detail, it constructs a Jacobian_sparsity object, -// calls expansion() and then expansion_Jacobian() to traverse the Jacobian -// matrix (recording the Jacobian's sparsity pattern in the process), then -// uses this sparsity pattern to construct the Jacobian matrix itself -// (and destroys the Jacobian_sparsity object). -// -Jacobian_matrix* - create_Jacobian_matrix(patch_system& ps, - const struct cactus_grid_info& cgi, - const struct geometry_info& gi, - const struct Jacobian_info& Jac_info, - bool print_msg_flag /* = false */) -{ -if (print_msg_flag) - then CCTK_VInfo(CCTK_THORNSTRING, - "traversing Jacobian matrix (recording sparsity pattern)"); -Jacobian_sparsity* JS_ptr - = new_Jacobian_sparsity(Jac_info.Jacobian_storage_method, - ps, - print_msg_flag); - -// it's safest to compute expansion() before doing the traversal -// to make sure all the Jacobian coefficients are set to sane values -expansion(ps, - cgi, gi, - true); // yes, compute Jacobian coeffs - -// now do the actual traversal (==> records the sparsity pattern) -expansion_Jacobian(ps, *JS_ptr, - cgi, gi, Jac_info); - -if (print_msg_flag) - then CCTK_VInfo(CCTK_THORNSTRING, - "constructing Jacobian matrix"); -Jacobian_matrix* Jac_ptr - = new_Jacobian_matrix(Jac_info.Jacobian_storage_method, - ps, *JS_ptr, - print_msg_flag); -delete JS_ptr; -return Jac_ptr; +"\n" +" decode_Jacobian_compute_method():\n" +" unknown Jacobian_compute_method_string=\"%s\"!", + Jacobian_compute_method_string); /*NOTREACHED*/ } diff --git a/src/include/config.h b/src/include/config.h index 492da5a..cf35c7a 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -47,6 +47,9 @@ typedef CCTK_INT integer; #error "don't know fp datatype!" #endif + +/**************************************/ + /* * The angular finite differencing in our multipatch system can be * either 2nd order or 4th order, configurable here. @@ -58,12 +61,18 @@ typedef CCTK_INT integer; #define FINITE_DIFF_ORDER 4 #endif +/**************************************/ + /* - * What types of Jacobian matrix storage do we want to compile in - * support for? N.b. each of these requires linking with the corresponding - * linear-solver library; see "../elliptic/Jacobian.hh" for details - * on the storage formats, or "../make.configuration.defn" (if it exists) - * for details on the libraries. + * What types of Jacobian matrix storage and linear solvers do we want to + * compile in support for? Note that each of these requires linking with + * the corresponding linear-solver library; see "../elliptic/Jacobian.hh" + * for details on the storage formats, or "../make.configuration.defn" + * (if it exists) for details on the libraries. */ -#define HAVE_DENSE_JACOBIAN -#define HAVE_ROW_SPARSE_JACOBIAN + +/* store as (Fortran) dense matrix, solve with LAPACK */ +#undef HAVE_DENSE_JACOBIAN__LAPACK + +/* store as row-oriented sparse matrix, solve with ILUCG */ +#define HAVE_ROW_SPARSE_JACOBIAN__ILUCG diff --git a/src/patch/fd_grid.hh b/src/patch/fd_grid.hh index b0d6307..6e0bf5f 100644 --- a/src/patch/fd_grid.hh +++ b/src/patch/fd_grid.hh @@ -357,6 +357,8 @@ #error "unknown value " FINITE_DIFF_ORDER " for FINITE_DIFF_ORDER!" #endif +#define FD_GRID__MOL_AREA (FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER) + //****************************************************************************** // |