diff options
author | lanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588> | 1999-09-03 17:57:58 +0000 |
---|---|---|
committer | lanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588> | 1999-09-03 17:57:58 +0000 |
commit | c15500f177e9c1966a5286f2b12388c5ac24ece1 (patch) | |
tree | 8963e38f1830c174bf0bc3357415e0761e118985 | |
parent | c9d3c127a42d1740c1f1f85e65e034b311addb4f (diff) |
The CactusElliptic arrangement
git-svn-id: http://svn.cactuscode.org/arrangements/CactusElliptic/EllPETSc/trunk@2 1d96b42b-98df-4a6a-9d84-1b24288d4588
-rw-r--r-- | README | 20 | ||||
-rw-r--r-- | interface.ccl | 9 | ||||
-rw-r--r-- | param.ccl | 49 | ||||
-rw-r--r-- | schedule.ccl | 12 | ||||
-rw-r--r-- | src/Startup.c | 18 | ||||
-rw-r--r-- | src/make.code.defn | 17 | ||||
-rw-r--r-- | src/make.configuration.defn | 20 | ||||
-rw-r--r-- | src/petsc_confmetric.c | 945 | ||||
-rw-r--r-- | src/petsc_confmetric_solver.c | 1064 | ||||
-rw-r--r-- | src/petsc_wrapper.c | 32 |
10 files changed, 2186 insertions, 0 deletions
@@ -0,0 +1,20 @@ +Cactus Code Thorn EllPETSc +Authors : ... +Managed by : ... <...@...........> +Version : ... +CVS info : $Header$ +-------------------------------------------------------------------------- + +1. Purpose of the thorn + +This thorn does ... + +2. Dependencies of the thorn + +This thorn additionally requires implementations and thorns ... + +3. Thorn distribution + +This thorn is available to ... + +4. Additional information diff --git a/interface.ccl b/interface.ccl new file mode 100644 index 0000000..ccaef7a --- /dev/null +++ b/interface.ccl @@ -0,0 +1,9 @@ +# Interface definition for thorn EllPETSc +# $Header$ + +implements: ellpetsc + +CCTK_REAL petscworkspace type=GF +{ + wsp +} "Workspace for the elliptic PETSc solver" diff --git a/param.ccl b/param.ccl new file mode 100644 index 0000000..7066256 --- /dev/null +++ b/param.ccl @@ -0,0 +1,49 @@ +# Parameter definitions for thorn PETSc_Elliptic +# $Header$ + +shares: grid + +USES KEYWORD domain "" +{ +} + +private: + +KEYWORD petsc_verbose "PETSc verbose output" +{ +"no" :: "No output" +"yes" :: "Some output" +"debug":: "Tons of output" +} "yes" + +LOGICAL petsc_reuse "Reuse parts of the PETSc structure" +{ +} "no" + +LOGICAL petsc_coeff_to_one "Divide each line of the matrix by the central value?" +{ +} "no" + +STRING petsc_KSP_type "See the PETSc Manual, p 49 for options, or look at petsc_ell source" +{ + :: +}"KSPBCGS" + +STRING petsc_PC_type "See the PETSc Manual, p 49 for options, or look at petsc_ell source" +{ + :: +} "PCJACOBI" + +KEYWORD petsc_nablaform "PETSC nabla form" +{ + "up" :: "" + "down" :: "" +} "down" + + + +#FIXME At the moment we pass toltype explicitly from LinearEllitpic +#CCTK_INT PetscTolStyle "PETSc Tolerance flavors FIXME" +#{ +# 0: :: "" +#} 0 diff --git a/schedule.ccl b/schedule.ccl new file mode 100644 index 0000000..f561e3c --- /dev/null +++ b/schedule.ccl @@ -0,0 +1,12 @@ +# Schedule definitions for thorn EllPETSc +# $Header$ + +STORAGE: petscworkspace +COMMUNICATION: petscworkspace + + +schedule EllPETSc_Register at CCTK_INITIAL +{ + LANG:C +} "Register the PETSc solvers" + diff --git a/src/Startup.c b/src/Startup.c new file mode 100644 index 0000000..05c3336 --- /dev/null +++ b/src/Startup.c @@ -0,0 +1,18 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "cctk.h" +#include "cctk_parameters.h" + +#include "CactusElliptic/LinearElliptic/src/LinearElliptic.h" + + +int EllPETSc_Register(cGH *GH) { + void petsc_confmetric(cGH *GH, int *MetricPsiI, int *FieldI, int *MI, + int *NI, int *TolArray); + + printf("PETSc: Registering petsc_confmetric..."); + Ell_RegisterSolver(petsc_confmetric,"petsc","Ell_LinConfMetric"); + printf("... done \n "); +} diff --git a/src/make.code.defn b/src/make.code.defn new file mode 100644 index 0000000..1d4e1ee --- /dev/null +++ b/src/make.code.defn @@ -0,0 +1,17 @@ +# Main make.code.defn file for thorn JacobiElliptic +# $Header$ + +# Source files in this directory +SRCS = Startup.c petsc_wrapper.c petsc_confmetric_solver.c + +# Subdirectories containing source files +SUBDIRS = + +SYS_INC_DIRS += /usr/local/mpich/include +SYS_INC_DIRS += /usr/local/mpich/build/LINUX/ch_p4/include + +SYS_INC_DIRS += $(PETSC_DIR) $(PETSC_DIR)/include \ + $(PETSC_DIR)/bmake/$(PETSC_ARCH) + + + diff --git a/src/make.configuration.defn b/src/make.configuration.defn new file mode 100644 index 0000000..abb7803 --- /dev/null +++ b/src/make.configuration.defn @@ -0,0 +1,20 @@ +# Main make.code.defn file for thorn PETSc_Elliptic +# $Header$ + +LIBDIRS += $(PETSC_DIR)/lib/libO/$(PETSC_ARCH) + + +#on linux system: the lapack libs (in rpms) are compiled with f2c/g77 +#and need the -lg2c or -lf2c to be linked +#keep them at the end of lapack + +MYLIBS := petsc petscdm petsccontrib \ + petscts petscsnes petscsles \ + petscmat petscvec \ + blas lapack g2c \ + mpich $(LIBS) + +LIBS = $(MYLIBS) + +EXTRAFLAGS = -I$(PETSC_DIR)/bmake/linux/base + diff --git a/src/petsc_confmetric.c b/src/petsc_confmetric.c new file mode 100644 index 0000000..dc3affb --- /dev/null +++ b/src/petsc_confmetric.c @@ -0,0 +1,945 @@ +/*@@ + @file PETScEll_conformal.c + @date Wed Apr 9 09:31:24 1997 + @author Paul Walker + @desc + A nabla phi - M phi = N elliptic solver based around PETSc. + For information, see the documentation for @seeroutine petscEll_conformal + @enddesc +@@*/ + +#include "cctk.h" +#include "cctk_parameters.h" +#include "cctk_Flesh.h" + +#include "assert.h" +#include "mpi.h" +#include "sles.h" + +#include "CactusPUGH/PUGH/src/include/pugh.h" + + +#define DEBUG + +/*Don't know what these actually mean! FIXME */ +#define ELLCONV_ABSOLUTE 0 +#define ELLCONV_RELATIVE 1 +#define ELLCONV_EITHER 2 + +/* A few convenient macros */ +#define a(i,j,k) a[i+1 + 3*(j+1 + 3*(k+1))] +#define SQR(a) ((a)*(a)) + + +/* Some useful definitions to deal with the upper metric */ +#define Uxx(i,j,k) uxx3[DATINDEX(pughGH,(i),(j),(k))] +#define Uxy(i,j,k) uxy3[DATINDEX(pughGH,(i),(j),(k))] +#define Uxz(i,j,k) uxz3[DATINDEX(pughGH,(i),(j),(k))] +#define Uyy(i,j,k) uyy3[DATINDEX(pughGH,(i),(j),(k))] +#define Uyz(i,j,k) uyz3[DATINDEX(pughGH,(i),(j),(k))] +#define Uzz(i,j,k) uzz3[DATINDEX(pughGH,(i),(j),(k))] + + +/* Place the matrix in global space so we can keep it around + with the memory stripped out. Thanks, Barry! */ + +static int trips=0; +static Mat *A; /* linear system matrix */ +static Vec soln, b; /* approx solution, RHS */ +static SLES sles; /* linear solver context */ + + + +void *GetDataPtr_NextTL(cGH *GH, const char *field) { + int index, tlevel; + char *err; + + index = CCTK_VarIndex(field); + if (index<0) { + err = (char*) malloc( 256*sizeof(char)); + sprintf(err,"ERROR: Index for >%s< not found",field); + CCTK_WARN(1,err); + if (err) free(err); + } + return((CCTK_REAL*)CCTK_VarDataPtrI(GH,0,index)); +} + + +/* This passing matches that of in the convention in the + elliptic registration routine see LinearElliptic.h*/ + +void petsc_confmetric_solver(cGH *GH, int *MetricPsiI, int MetricPsiISize, + int *FieldIndex, int *MIndex, int *NIndex, + int *AbsTol, int *RelTol) { + + DECLARE_CCTK_PARAMETERS /* CCTK passed parameters */ + + + void *GetDataPtr_NextTL(cGH *GH, const char *field); + int CCTK_GetArgc(void); /* PETSc wnats to know the prog. name */ + void CCTK_GetArgv(char **argv); /* and parameters, don't think they can be passed through */ + + + PC pc; /* preconditioner context */ + KSP ksp; /* Krylov subspace method context */ + int num_A; /* Number of A-arrays as needed ny Dan's MG */ + + + double norm; /* norm of solution error */ + int ierr; /* Check the return status of petsc */ + int retcode; /* Check the return status of CCTK */ + + double a[27]; /* The stencil array */ + int idx[27]; /* The column of the stencil array */ + int rank; /* Rank of the matrix/vector */ + int its; /* Number of iterations */ + + /* Loopers */ + int i,j,k,l,m,n; + + /* loop limits put in for stencil_w !=1 Ed Evans 1/19/98 */ + int imin,imax,jmin,jmax,kmin,kmax,interior; + + pGH *pughGH; /* The pugh Extension handle */ + int myproc; /* out processor */ + + /* Tolerances */ + CCTK_REAL rtol, atol, tolerance; + + /* Values to assemble the matrix */ + CCTK_REAL two=2.0, four=4.0, tmp; + CCTK_REAL pm4, psixp,psiyp,psizp,det; + CCTK_REAL dx,dy,dz; + CCTK_REAL uxx,uxy,uxz,uyy,uyz,uzz; + CCTK_REAL Gxxx,Gxxy,Gxxz,Gxyy,Gxyz,Gxzz; /* Christoffels */ + CCTK_REAL Gyxx,Gyxy,Gyxz,Gyyy,Gyyz,Gyzz; + CCTK_REAL Gzxx,Gzxy,Gzxz,Gzyy,Gzyz,Gzzz; + CCTK_REAL dxxx,dxxy,dxxz,dxyy,dxyz,dxzz; + CCTK_REAL dyxx,dyxy,dyxz,dyyy,dyyz,dyzz; + CCTK_REAL dzxx,dzxy,dzxz,dzyy,dzyz,dzzz; + CCTK_REAL *values; + + int nxs,nys,nzs; /* Size of the grid with stencils off... */ + int startpoint=0; /* My starting index (per proc) */ + int endpoint; /* One more than my end */ + int pstart, pend; /* A check for PETSc layout */ + int pvstart, pvend; /* A check for PETSc layout */ + int verbose; /* Is the solver verbose */ + int debug; /* Is the solver debug-verbose */ + int octant; /* Apply octant BCs inside */ + int nabla_form; /* Which form of the nable */ + int reuse; /* Do PETSc reuse tricks? */ + int matnormalize; /* Normalize the central mat value to one? */ + CCTK_REAL ac; /* Storage for a(0,0,0) for renorm */ + int pctype, ksptype; /* Which PC and KSP to use. See below for + monster nested if statement. */ + int PetscTolStyle; + + /* For the upper metric form of nabla */ + CCTK_REAL *uxx3, *uxy3, *uxz3, *uyy3, *uyz3, *uzz3; + + + /* Pointers to the data of : petsc workspace/ the GF to solve / + metric / psi / derivs(psi) / Mlinear / Nlinear(source) */ + CCTK_REAL *wsp =NULL, *ell_field=NULL; + CCTK_REAL *gxx =NULL, *gxy =NULL; + CCTK_REAL *gxz =NULL, *gyy =NULL; + CCTK_REAL *gyz =NULL, *gzz =NULL; + CCTK_REAL *Mlin=NULL, *Nlin=NULL; + CCTK_REAL *psi =NULL; + CCTK_REAL *psix=NULL, *psiy=NULL, *psiz=NULL; + + /* Get some parameters */ + octant = CCTK_Equals(domain,"octant"); + verbose = CCTK_Equals(petsc_verbose,"yes")||CCTK_Equals(petsc_verbose,"debug"); + debug = CCTK_Equals(petsc_verbose,"debug"); + reuse = 0; /*$CCTK_Equals(petsc_reuse,"yes");$*/ + matnormalize = 0; /*$CCTK_Equals(petsc_coeff_to_one,"yes");$*/ + + /* FIXME, the TolAbs/TolRel will be evaluated here */ + PetscTolStyle=0; + tolerance =AbsTol[0]; + + printf("\n**** PETSC !!!!****\n\n"); + + + /* Get the link to pugh Extension */ + pughGH = (pGH*)GH->extensions[CCTK_GHExtensionHandle("PUGH")]; + if (!pughGH) CCTK_WARN(0,"ETERNAL ERROR: Cannot find PUGH Extension Handle\n"); + + /* Things to do on first iteration */ + if (trips==0) { + int argc,i; + char **argv; + +#ifdef DEBUG + if (debug) + printf("PETSc: initial trip: %d \n",trips); +#endif + + /* Get the commandline arguments */ + argc = CCTK_GetArgc(); + argv = (char**) malloc ((argc+1)*sizeof(char*)); + for (i=0;i<argc;i++) + argv[i] = (char*) malloc (256*sizeof(char)); + CCTK_GetArgv(argv); + + /* Set the PETSc communicator to set of PUGH and + initialzie PETSc */ + ierr = PetscSetCommWorld(pughGH->PUGH_COMM_WORLD); CHKERRA(ierr); + PetscInitialize(&argc,&argv,NULL,NULL); + + CCTK_INFO("PETSc initialized"); + } + + trips++; + + /* Create a array of matrices A */ + /* num_A = MultiGridCount(); */ + num_A = 1; + A=(Mat*) malloc (sizeof(Mat)*num_A); + + /* Get the data ptr of these GFs, They all have to be + on the same timelevel */ + + /* derive the metric data pointer from the index array. + Note the ordering in the metric */ + ell_field = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*FieldIndex); + Mlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*MIndex); + Nlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*NIndex); + + gxx = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[0]); + gxy = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[1]); + gxz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[2]); + gyy = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[3]); + gyz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[4]); + gzz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[5]); + + /* FIXME: evolved derivatives should go! -> Einstein specific*/ + if (conformal) { + psi = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[6]); + psix =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psix"); + psiy =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psiy"); + psiz =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psiz"); + } + + wsp = GetDataPtr_NextTL(GH,"ellpetsc::wsp"); + + + /* initialize the linear index lookup table, after it's + filled up (below), the -1 indicates a boundary */ + for (i=0;i<pughGH->npoints;i++) wsp[i] = -1.0; + + /* Get myproc from the CCTK, get the gridspacing */ + myproc = pugh_MyProc(GH); + dx = GH->cctk_delta_space[0]; + dy = GH->cctk_delta_space[1]; + dz = GH->cctk_delta_space[2]; + + /* We fix the lower and upper boundary indices, that actually "active" in + the sense that they are no ghostzones: */ + imin=((pughGH->neighbors[pughGH->myproc][XDM]<0) && !(octant) ? 1 : + pughGH->nghostzones); + imax=((pughGH->neighbors[pughGH->myproc][XDP]<0) ? pughGH->lnx-1 : + pughGH->lnx-GH->cctk_nghostzones[0]); + + jmin=((pughGH->neighbors[pughGH->myproc][YDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[1]); + jmax=((pughGH->neighbors[pughGH->myproc][YDP]<0) ? pughGH->lny-1 : + pughGH->lny-GH->cctk_nghostzones[1]); + + kmin=((pughGH->neighbors[pughGH->myproc][ZDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[2]); + kmax=((pughGH->neighbors[pughGH->myproc][ZDP]<0) ? pughGH->lnz-1 : + pughGH->lnz-GH->cctk_nghostzones[2]); + + /* We need to get the global index of gridpoints (gps) owned on my proc. For that + we have to know how many gps are there before my processors (i<myproc), + so we count them here. (that's minus the ghostszones (we use PUGH's "rn" + variables (number of gp on proc[#] in direction [i]) and subtract the + ghostzones if nec.) */ + + for (i=0;i<myproc;i++) { + + nxs=pughGH->rnx[i]; + nxs=((pughGH->neighbors[i][XDM]<0) && !(octant) ? nxs-1 : + nxs-GH->cctk_nghostzones[0]); + nxs=((pughGH->neighbors[i][XDP]<0) ? nxs-1 : nxs-GH->cctk_nghostzones[0]); + + nys=pughGH->rny[i]; + nys=((pughGH->neighbors[i][YDM]<0) && !(octant) ? nys-1 : + nys-GH->cctk_nghostzones[1]); + nys=((pughGH->neighbors[i][YDP]<0) ? nys-1 : nys-GH->cctk_nghostzones[1]); + + nzs=pughGH->rnz[i]; + nzs=((pughGH->neighbors[i][ZDM]<0) && !(octant) ? nzs-1 : + nzs-GH->cctk_nghostzones[2]); + nzs=((pughGH->neighbors[i][ZDP]<0) ? nzs-1 : nzs-GH->cctk_nghostzones[2]); + + printf("PROC: %d nxyzs %d %d %d \n",i,nxs,nys,nzs); + startpoint += nxs*nys*nzs; + } + +#ifdef DEBUG + printf("STARTPOINT: %d \n",startpoint); +#endif + + /* ...we save that as a start ...*/ + endpoint = startpoint; + + /* ...and continue running over our own region to get our endpoint */ + for (k=imin;k<kmax;k++) + for (j=jmin;j<jmax;j++) + for (i=imin;i<imax;i++) + wsp[DATINDEX(pughGH,i,j,k)] = endpoint++; +#ifdef DEBUG + printf("ENDPOINT: %d \n",endpoint); +#endif DEBUG + + /* So now each point has a unique index of its own. If we + do a sync that means each processor knows the indiced + in the ghost zones (eg, on its neighbors) in a FD stencil + of 1 */ + retcode = pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + + /* So woohoo. Now for each point in our ijk space, we have + information about our row in the matrix (workspace->data[ijk]) + as well as the column for the stencil of the neighbors + (workspace->data[DATINDEX(GH,i+1,j,k)] etc...). So onwards */ + + /* FIXME I don't think this works for stencil=2 ! */ +#ifdef DEBUG + if (debug) + CCTK_DumpGHInfo(GH); +#endif + + nxs = pughGH->nx-2; + nys = pughGH->ny-2; + nzs = pughGH->nz-2; + +#ifdef DEBUG + if (debug) { + printf("nxyzs %d %d %d \n",nxs,nys,nzs); + printf("lnxyz: %d %d %d \n",pughGH->lnx,pughGH->lny,pughGH->lnz); + } +#endif + + /* Suck off boundaries... */ + rank = nxs*nys*nzs; + + + /* Create the petsc matrix A and vectors x and b efficiently. + + Now for efficiency we have to be really careful here. By + setting up the index array, we are abandoning the 19-way + tri-diagonal system so we can lay out our matrices on the + processors with the data. (One one proc we will still + get 19 way tridiagonal). So we want each processor + to own only part of the matrix/vector but we know exactly + what it is, eg, from startpoint to endpoint-1 is on our + local processor (that is, row-wise ; all columns are local). + + So we can use MatCreateMPIAIJ for exactly this, and + VecCreateMPI for the vectors. + */ + + /* FIXME: perhaps only set up only on first iteration, this was a hack + by PAUL, using some inofficical petsc code, check if this is + "official", yet */ + + /*$if (trips == 0 || reuse == 0) {$*/ + if (verbose) CCTK_INFO("Creating Matrices and Vectors ...."); + ierr = MatCreateMPIAIJ(pughGH->PUGH_COMM_WORLD, + (endpoint-startpoint), /* # of rows */ + (endpoint-startpoint), /* This is confusing */ + rank,rank, /* Matrix size */ + 19,PETSC_NULL, /* Diagonal */ + 19,PETSC_NULL, /* Off diagonal */ + &A[0]); /* The output */ + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&soln); + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&b); + CHKERRA(ierr); + /*$}$*/ + /*$else {$*/ + /* ierr = MatRestoreValuesMemory(A); */ + /*$CCTK_WARN(0,"no reuse"); + CHKERRA(ierr); + }$*/ + + + /* Compare the PETSc layout to Cactus, this better be a match */ + ierr = VecGetOwnershipRange(soln,&pvstart,&pvend); + ierr = MatGetOwnershipRange(A[0],&pstart,&pend); + printf("CAC M-Layout: %d %d \n",startpoint, endpoint); + printf("PET M-Layout: %d %d \n",pstart, pend); + printf("PET V-Layout: %d %d \n",pvstart,pvend); + if (pstart != startpoint && pend != endpoint) + CCTK_WARN(1,"WARNING: PETSC and data layouts differ! (why??)\n"); + + nabla_form = 2; + if (verbose) + CCTK_INFO("Forming nabla with lower g and finite difference of g"); + + if (verbose) + CCTK_INFO("Creating the coefficient matrix"); + + if (verbose && matnormalize) + CCTK_INFO ("With diagonal renormalized to one"); + + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + + if (wsp[DATINDEX(pughGH,i,j,k)] >= 0) { /* FIXME */ + + CCTK_REAL tdxgxx, tdxgxy, tdxgxz, tdxgyy, tdxgyz, tdxgzz; + CCTK_REAL tdygxx, tdygxy, tdygxz, tdygyy, tdygyz, tdygzz; + CCTK_REAL tdzgxx, tdzgxy, tdzgxz, tdzgyy, tdzgyz, tdzgzz; + + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + CCTK_REAL rhsval = 0; + + for (I=0;I<27;I++) a[I] = 0.0; + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); /* get linear index */ + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Get the row we are working on */ + I = wsp[ijk]; + + /* Setup Temporaries / Psi derivatives on psi */ + if (conformal) { + pm4 = 1./pow(psi[ijk],4.0); + psixp = psix[ijk]; + psiyp = psiy[ijk]; + psizp = psiz[ijk]; + } else { + pm4 = 1.0; + psixp = 0.0; + psiyp = 0.0; + psizp = 0.0; + } + + if (nabla_form == 2) { + /* Use finite differences of g for the d's */ + int ijkp, ijkm; + + /* X derivatives */ + ijkp = DATINDEX(pughGH,i+1,j,k); + ijkm = DATINDEX(pughGH,i-1,j,k); + + tdxgxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dx); + tdxgxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dx); + tdxgxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dx); + tdxgyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dx); + tdxgyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dx); + tdxgzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dx); + + + /* Y derivatives */ + ijkp = DATINDEX(pughGH,i,j+1,k); + ijkm = DATINDEX(pughGH,i,j-1,k); + + tdygxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dy); + tdygxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dy); + tdygxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dy); + tdygyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dy); + tdygyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dy); + tdygzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dy); + + /* X derivatives */ + ijkp = DATINDEX(pughGH,i,j,k+1); + ijkm = DATINDEX(pughGH,i,j,k-1); + + tdzgxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dz); + tdzgxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dz); + tdzgxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dz); + tdzgyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dz); + tdzgyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dz); + tdzgzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dz); + + /* great ... so start hacking away at the coefficients. + If this looks a lot like sor_conformal.F it is... */ + + /* Form upper metric - compute determinant */ + det= -(SQR(gxz[ijk])*gyy[ijk]) + + 2*gxy[ijk]*gxz[ijk]*gyz[ijk] - + gxx[ijk]*SQR(gyz[ijk]) - + SQR(gxy[ijk])*gzz[ijk] + + gxx[ijk]*gyy[ijk]*gzz[ijk]; + + /*invert metric. This is the conformal upper metric. */ + uxx=(-SQR(gyz[ijk]) + gyy[ijk]*gzz[ijk])/det; + uxy=(gxz[ijk]*gyz[ijk] - gxy[ijk]*gzz[ijk])/det; + uyy=(-SQR(gxz[ijk]) + gxx[ijk]*gzz[ijk])/det; + uxz=(-gxz[ijk]*gyy[ijk] + gxy[ijk]*gyz[ijk])/det; + uyz=(gxy[ijk]*gxz[ijk] - gxx[ijk]*gyz[ijk])/det; + uzz=(-SQR(gxy[ijk]) + gxx[ijk]*gyy[ijk])/det; + + /* Coeff. Contributions from second derivative */ + + /* X derivative */ + a(-1,0,0) = pm4 * uxx / (dx*dx); + a(0,0,0) = -two * pm4 * uxx / (dx*dx); + a(1,0,0) = pm4 * uxx / (dx*dx); + /* Y derivative */ + a(0,-1,0) = pm4 * uyy / (dy*dy); + a(0,0,0) = a(0,0,0) - two * pm4 * uyy / (dy*dy); + a(0,1,0) = pm4 * uyy / (dy*dy); + /* Z derivative */ + a(0,0,-1) = pm4 * uzz / (dz*dz); + a(0,0,0) = a(0,0,0) - two * pm4 * uzz / (dz*dz); + a(0,0,1) = pm4 * uzz / (dz*dz); + /* Mixed XY */ + a(1,1,0) = two * pm4 * uxy / (dx*dy); + a(-1,-1,0)= two * pm4 * uxy / (dx*dy); + a(1,-1,0) = -two * pm4 * uxy / (dx*dy); + a(-1,1,0) = -two * pm4 * uxy / (dx*dy); + /* Mixed XZ */ + a(1,0,1) = two * pm4 * uxz / (dx*dz); + a(-1,0,-1)= two * pm4 * uxz / (dx*dz); + a(1,0,-1) = -two * pm4 * uxz / (dx*dz); + a(-1,0,1) = -two * pm4 * uxz / (dx*dz); + /* Mixed YZ */ + a(0,1,1) = two * pm4 * uyz / (dz*dy); + a(0,-1,-1)= two * pm4 * uyz / (dz*dy); + a(0,-1,1) = -two * pm4 * uyz / (dz*dy); + a(0,1,-1) = -two * pm4 * uyz / (dz*dy); + + /* Great so now form christoffels. Remember that + + G_kij = psi^4 (2 * (psi_j/psi g_ik + psi_i/psi g_jk - + psi_k/psi g_ij) + + D_jik + D_ijk - D_kij) + + Since these are the DOWN christoffels, store them in d... + + NOTE however that since we will up these, we can drop the + psi^4 here and psi^-4 from the up metric. */ + + + /* These three have lots of cancelations */ + dxxx = (two * psixp * gxx[ijk] + tdxgxx); + dxxy = (two * psiyp * gxx[ijk] + tdygxx); + dxxz = (two * psizp * gxx[ijk] + tdzgxx); + /* This one has a reduction of two identical terms */ + dxyy = (four * psiyp * gxy[ijk] - + two * psixp * gyy[ijk] + + two * tdygxy - tdxgyy); + /* As does this one */ + dxzz = (four * psizp * gxz[ijk] - + two * psixp * gzz[ijk] + + two * tdzgxz - tdxgzz); + /* And this one is completely general */ + dxyz = (two * psiyp * gxz[ijk] + + two * psizp * gxy[ijk] - + two * psixp * gyz[ijk] + + tdzgxy + tdygxz - tdxgyz); + + /* Now do it twice more without the explanations */ + dyyy = (two * psiyp * gyy[ijk] + tdygyy); + dyxy = (two * psixp * gyy[ijk] + tdxgyy); + dyyz = (two * psizp * gyy[ijk] + tdzgyy); + dyxx = (four * psixp * gxy[ijk] - + two * psiyp * gxx[ijk] + + two * tdxgxy - tdygxx); + dyzz = (four * psizp * gyz[ijk] - + two * psiyp * gzz[ijk] + + two * tdzgyz - tdygzz); + dyxz = (two * psizp * gxy[ijk] + + two * psixp * gyz[ijk] - + two * psiyp * gxz[ijk] + + tdzgxy + tdxgyz - tdygxz); + + dzzz = (two * psizp * gzz[ijk] + tdzgzz); + dzxz = (two * psixp * gzz[ijk] + tdxgzz); + dzyz = (two * psiyp * gzz[ijk] + tdygzz); + dzxx = (four * psixp * gxz[ijk] - + two * psizp * gxx[ijk] + + two * tdxgxz - tdzgxx); + dzyy = (four * psiyp * gyz[ijk] - + two * psizp * gyy[ijk] + + two * tdygyz - tdzgyy); + dzxy = (two * psiyp * gxz[ijk] + + two * psixp * gyz[ijk] - + two * psizp * gxy[ijk] + + tdxgyz + tdygxz - tdzgxy); + + /* And now raise the first index */ + Gxxx = uxx*dxxx + uxy*dyxx + uxz*dzxx; + Gxxy = uxx*dxxy + uxy*dyxy + uxz*dzxy; + Gxxz = uxx*dxxz + uxy*dyxz + uxz*dzxz; + Gxyy = uxx*dxyy + uxy*dyyy + uxz*dzyy; + Gxyz = uxx*dxyz + uxy*dyyz + uxz*dzyz; + Gxzz = uxx*dxzz + uxy*dyzz + uxz*dzzz; + + Gyxx = uxy*dxxx + uyy*dyxx + uyz*dzxx; + Gyxy = uxy*dxxy + uyy*dyxy + uyz*dzxy; + Gyxz = uxy*dxxz + uyy*dyxz + uyz*dzxz; + Gyyy = uxy*dxyy + uyy*dyyy + uyz*dzyy; + Gyyz = uxy*dxyz + uyy*dyyz + uyz*dzyz; + Gyzz = uxy*dxzz + uyy*dyzz + uyz*dzzz; + + Gzxx = uxz*dxxx + uyz*dyxx + uzz*dzxx; + Gzxy = uxz*dxxy + uyz*dyxy + uzz*dzxy; + Gzxz = uxz*dxxz + uyz*dyxz + uzz*dzxz; + Gzyy = uxz*dxyy + uyz*dyyy + uzz*dzyy; + Gzyz = uxz*dxyz + uyz*dyyz + uzz*dzyz; + Gzzz = uxz*dxzz + uyz*dyzz + uzz*dzzz; + + + /* Great. So now start adding the summed contributions + from the first derivative. Note that these all have a + sign change since the term comes in - ... */ + + /* g^ij G ^x_ij */ + tmp = uxx * Gxxx + uyy * Gxyy + uzz * Gxzz + + two * uxy * Gxxy + + two * uxz * Gxxz + + two * uyz * Gxyz; + a(1,0,0) = a(1,0,0) - pm4 * tmp / (two*dx); + a(-1,0,0) = a(-1,0,0) + pm4 * tmp / (two*dx); + + /* g^ij G^y_ij */ + tmp = uxx * Gyxx + uyy * Gyyy + uzz * Gyzz + + two * uxy * Gyxy + + two * uxz * Gyxz + + two * uyz * Gyyz; + a(0,1,0) = a(0,1,0) - pm4 * tmp / (two*dy); + a(0,-1,0) = a(0,-1,0) + pm4 * tmp / (two*dy); + + /* g^ij G^z_ij */ + tmp = uxx * Gzxx + uyy * Gzyy + uzz * Gzzz + + two * uxy * Gzxy + + two * uxz * Gzxz + + two * uyz * Gzyz; + a(0,0,1) = a(0,0,1) - pm4 * tmp / (two*dz); + a(0,0,-1) = a(0,0,-1) + pm4 * tmp / (two*dz); + + } + + /* M phi */ + a(0,0,0) = a(0,0,0) - Mlin[ijk]; + + /* Great now set the values of the matrix. This is + really painful due to the boundaries (here we force + dirichlet). */ + VecSetValues(soln,1,&I,&(ell_field[ijk]),INSERT_VALUES); + + /* Put in the octant boundary conditions. + + We do these by reflecting the -1 stencil if we are at a + boundary at -1. That is, imagine the 1D laplace equation + with the boundary condition a(0) = a(1). This means + the point 1 stencil (which is our first stencil in + the matrix) becomes + + a(0) + a(2) - 2 a(1) = rho(i) * deltax -> + a(2) - 2 a(1) + a(1) = rho(i) * deltax + + OK so think about this with a general stenci + + S_0 a_0 + S_1 a_1 + S_2 a_2 = rho_1 -> + S_0 a_1 + S_1 a_1 + S_2 a_2 = rho_1 -> + (S_0 + S_1) a_1 + S_2 a_2 = rho_1 + + eg, S_0 -> 0 and S_1 -> S_1 + S_0 + + Great, so implement this in 3D. It is a bit trickier, + since we don't want to zero neighbors in the wrong + direction, but not impossible. + + */ + + if (octant) + for (l=-1;l<=0;l++) + for (m=-1;m<=0;m++) + for (n=-1;n<=0;n++) + if (l*m*n == 0) + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0 && + (ig == imin || jg == jmin || kg == kmin)) + { + /* We are on an inner boundary point, eg, + at an inner face */ + int ll,mm,nn; + /* Only zero the guys at the boundaries */ + ll = (ig == imin ? 0 : l); + mm = (jg == jmin ? 0 : m); + nn = (kg == kmin ? 0 : n); + a(ll,mm,nn) += a(l,m,n); + a(l,m,n) = 0.0; + } + + /* renormalize */ + if (matnormalize) { + ac = a(0,0,0); + for (J=0;J<27;J++) a[J] = a[J] / ac; + } + else ac = 1; + + + /* This is the new way-clever look. Note it relies + heavily on the index array in the workspace and + on multiple processors it will *NOT* make a + 19-way banded matrix. */ + + for (l=-1;l<=1;l++) + for (m=-1;m<=1;m++) + for (n=-1;n<=1;n++) { + if (l*m*n == 0) { /* This is the 19 point ... if none are + * zero, then we have no stencil here. + */ + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0) { + /* This is a boundary. */ + rhsval += a(l,m,n) * ell_field[DATINDEX(pughGH,i+l,j+m,k+n)]; + } + else { + J = wsp[DATINDEX(pughGH,i+l,j+m,k+n)]; + ierr = MatSetValues(A[0],1,&I,1,&J,&(a(l,m,n)),INSERT_VALUES); + CHKERRA(ierr); + } + } + } + + rhsval = -rhsval + Nlin[ijk] / ac; + /* printf("RHS(%d %d %d)+N: %f",i,j,k,rhsval);*/ + + ierr = VecSetValues(b,1,&I,&rhsval,INSERT_VALUES); + CHKERRA(ierr); + + } + } + } + } + + if (verbose) CCTK_INFO ("Assembling the vectors"); + ierr = MatAssemblyBegin(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyBegin(soln); CHKERRA(ierr); + ierr = VecAssemblyBegin(b); CHKERRA(ierr) + ierr = MatAssemblyEnd(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyEnd(soln); CHKERRA(ierr); + ierr = VecAssemblyEnd(b); CHKERRA(ierr); + ierr = MatSetOption(A[0],MAT_NO_NEW_NONZERO_LOCATIONS); CHKERRA(ierr); + + OptionsSetValue("-ksp_monitor",""); + + CCTK_INFO("CREATING SLES"); + ierr = SLESCreate(pughGH->PUGH_COMM_WORLD,&sles); CHKERRA(ierr); + + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. At a later date + we can probably optimize this using SAME_NONZERO_PATTERN + and a static trip-though flag (eg, on the second trip + through do something different). Also we need to think about + using A to precondition itself. Since I don't know what this + means, we'll leave it for now. */ + + CCTK_INFO("CREATING SLES OPERATOR"); + ierr = SLESSetOperators(sles,A[0],A[0],DIFFERENT_NONZERO_PATTERN); CHKERRA(ierr); + + /* Set linear solver defaults for this problem. Later this + should be replaced/modified with appropriate parsing from the + parameter parser. For now it is not. These defaults are + reasonable, I hope. */ + CCTK_INFO("SLESGet KSP/PC"); + ierr = SLESGetKSP(sles,&ksp); CHKERRA(ierr); + ierr = SLESGetPC(sles,&pc); CHKERRA(ierr); + + /* Get the PC Type */ + if (CCTK_Equals(petsc_PC_type,"PCJACOBI")) + ierr = PCSetType(pc,PCJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCBJACOBI")) + ierr = PCSetType(pc,PCBJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCICC")) + ierr = PCSetType(pc,PCICC); + else if (CCTK_Equals(petsc_PC_type,"PCILU")) + ierr = PCSetType(pc,PCILU); + else if (CCTK_Equals(petsc_PC_type,"PCASM")) + ierr = PCSetType(pc,PCASM); + else if (CCTK_Equals(petsc_PC_type,"PCLU")) + ierr = PCSetType(pc,PCLU); + else if (CCTK_Equals(petsc_PC_type,"PCNONE")) + ierr = PCSetType(pc,PCNONE); + else { + CCTK_WARN(1,"Don't understand petsc_PC_type. Using PCNONE\n"); + ierr = PCSetType(pc,PCNONE); + } + CHKERRA(ierr); + + + /* Now the same thing for the KSP Type */ + if (CCTK_Equals(petsc_KSP_type,"KSPBCGS")) + ierr = KSPSetType(ksp,KSPBCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPRICHARDSON")) + ierr = KSPSetType(ksp,KSPRICHARDSON); + else if (CCTK_Equals(petsc_KSP_type,"KSPCHEBYCHEV")) + ierr = KSPSetType(ksp,KSPCHEBYCHEV); + else if (CCTK_Equals(petsc_KSP_type,"KSPCG")) + ierr = KSPSetType(ksp,KSPCG); + else if (CCTK_Equals(petsc_KSP_type,"KSPGMRES")) + ierr = KSPSetType(ksp,KSPGMRES); + else if (CCTK_Equals(petsc_KSP_type,"KSPCGS")) + ierr = KSPSetType(ksp,KSPCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPTFQMR")) + ierr = KSPSetType(ksp,KSPTFQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPTCQMR")) + ierr = KSPSetType(ksp,KSPTCQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPCR")) + ierr = KSPSetType(ksp,KSPCR); + else if (CCTK_Equals(petsc_KSP_type,"KSPLSQR")) + ierr = KSPSetType(ksp,KSPLSQR); + else { + CCTK_WARN (1,"I don't understand petsc_KSP_type. Using BiCGSTAB\n"); + ierr = KSPSetType(ksp,KSPBCGS); + } + CHKERRA(ierr); + + + + /* Set up tolerances */ + + if (PetscTolStyle == ELLCONV_ABSOLUTE) { + rtol = 1.0e-15; + atol = tolerance; + } else if (PetscTolStyle == ELLCONV_RELATIVE) { + rtol = tolerance; + atol = 1.0e-15; + } else if (PetscTolStyle == ELLCONV_EITHER) { + rtol = tolerance; + atol = tolerance; + } else { + printf("PETSC Solver: PetscTolStyle set incorrectly [%d]\n", + PetscTolStyle); + } + + ierr = KSPSetTolerances(ksp,rtol,atol,PETSC_DEFAULT, + PETSC_DEFAULT); + CHKERRA(ierr); + + + /* We are warned in the manual that + + The default technique for orthogonalization of the Hessenberg matrix + in GMRES is the modified Gram-Schmidt method, which employs many + VecDot() operations and can thus be slow in parallel. A fast approach + is to use the unmodified Gram-Schmidt method, which can be set with + + ierr = KSPGMRESSetOrthogonalization(KSP ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + + or the options database command + -ksp_gmres_unmodifiedgramschmidt. Note that this algorithm is + numerically unstable, but may deliver much better speed + performance. One can also use unmodifed Gram-Schmidt with + iterative refinement, by setting the orthogonalization routine, + KSPGMRESIROrthog(), by using the command line option + -ksp_gmres_irorthog. + + So this my not be a smart thing to do. But lets put it here + so we can turn it on or off later. + + */ + /*$ierr = KSPGMRESSetOrthogonalization(ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + CHKERRA(ierr);$*/ + + /* We also learn that ... + + By default, KSP assumes an initial guess of zero by zeroing the + initial value for the solution vector that is given. To use a + nonzero initial guess, the user must call + + ierr = KSPSetInitialGuessNonzero(KSP ksp); + + */ + CCTK_INFO("KSPSetInitialGuess\n"); + ierr = KSPSetInitialGuessNonzero(ksp); CHKERRA(ierr); + + /* + Set runtime options. Since we don't use PETSC runtime options + but rather use the CACTUS parameter parser, we do this before + we parse things out. But that may not be such a good idea. + So lets put it here. + */ + ierr = SLESSetFromOptions(sles); CHKERRA(ierr); + + + /* OK so finally we are able to ... */ + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Solve the linear system + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + CCTK_INFO("SLES solve..."); + ierr = SLESSolve(sles,b,soln,&its); CHKERRA(ierr); + + /* Here we can form a "res = Ax - b" and find its norm to get + an idea of how well we've converged. Put this in later + but we can do it using MatMultAdd() after we flip the sign + on b with VecScale() (into res, a new vector, then find the + norm of res with VecNorm() + */ + + /* Now since we now have local layout, we can just get the values + from the soln vector now matter how many processors we are + on (eg, the VecCreateMPI has made the solution local) */ + + VecGetArray(soln,&values); + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + + CCTK_REAL rhsval = 0; + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); + if (wsp[ijk] >= 0) { + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Now this one. "Fortran-order" the matrix. But remember + we have stripped off the ghost zones. Hence ig-1 and nxs... + */ + I =wsp[ijk]-startpoint; + ell_field[ijk] = values[I]; + } + } + } + } + + VecRestoreArray(soln,&values); + + /* Sync var in CCTK speak. */ + pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + /* And finally free up the matrix memory FIXME + ierr = MatReleaseValuesMemory(A); CHKERRA(ierr); */ + + /* This code is not used anymore */ + if (verbose) CCTK_INFO("Destroying Matrices"); + ierr = SLESDestroy(sles); CHKERRA(ierr); + ierr = VecDestroy(soln); CHKERRA(ierr); + ierr = VecDestroy(b); CHKERRA(ierr); + ierr = MatDestroy(A[0]); CHKERRA(ierr); + free(A); + + /*$PetscFinalize();$*/ + + if (verbose) CCTK_INFO("LEAVING ELLPETSC"); + +} diff --git a/src/petsc_confmetric_solver.c b/src/petsc_confmetric_solver.c new file mode 100644 index 0000000..c60312e --- /dev/null +++ b/src/petsc_confmetric_solver.c @@ -0,0 +1,1064 @@ +/*@@ + @file PETScEll_conformal.c + @date Wed Apr 9 09:31:24 1997 + @author Paul Walker + @desc + A nabla phi - M phi = N elliptic solver based around PETSc. + For information, see the documentation for @seeroutine petscEll_conformal + @enddesc +@@*/ + +#include "cctk.h" +#include "cctk_parameters.h" +#include "cctk_Flesh.h" + +#include "assert.h" +#include "mpi.h" +#include "sles.h" + +#include "CactusPUGH/PUGH/src/include/pugh.h" + + +#define DEBUG + +/*Don't know what these actually mean! FIXME */ +#define ELLCONV_ABSOLUTE 0 +#define ELLCONV_RELATIVE 1 +#define ELLCONV_EITHER 2 + +/* A few convenient macros */ +#define a(i,j,k) a[i+1 + 3*(j+1 + 3*(k+1))] +#define SQR(a) ((a)*(a)) + + +/* Some useful definitions to deal with the upper metric */ +#define Uxx(i,j,k) uxx3[DATINDEX(pughGH,(i),(j),(k))] +#define Uxy(i,j,k) uxy3[DATINDEX(pughGH,(i),(j),(k))] +#define Uxz(i,j,k) uxz3[DATINDEX(pughGH,(i),(j),(k))] +#define Uyy(i,j,k) uyy3[DATINDEX(pughGH,(i),(j),(k))] +#define Uyz(i,j,k) uyz3[DATINDEX(pughGH,(i),(j),(k))] +#define Uzz(i,j,k) uzz3[DATINDEX(pughGH,(i),(j),(k))] + + +/* Place the matrix in global space so we can keep it around + with the memory stripped out. Thanks, Barry! */ + +static int trips=0; +static Mat *A; /* linear system matrix */ +static Vec soln, b; /* approx solution, RHS */ +static SLES sles; /* linear solver context */ + + + +void *GetDataPtr_NextTL(cGH *GH, const char *field) { + int index, tlevel; + char *err; + + index = CCTK_VarIndex(field); + if (index<0) { + err = (char*) malloc( 256*sizeof(char)); + sprintf(err,"ERROR: Index for >%s< not found",field); + CCTK_WARN(1,err); + if (err) free(err); + } + return((CCTK_REAL*)CCTK_VarDataPtrI(GH,0,index)); +} + + +/* This passing matches that of in the convention in the + elliptic registration routine see LinearElliptic.h*/ + +void petsc_confmetric_solver(cGH *GH, int *MetricPsiI, int MetricPsiISize, + int *FieldIndex, int *MIndex, int *NIndex, + int *AbsTol, int *RelTol) { + + DECLARE_CCTK_PARAMETERS /* CCTK passed parameters */ + + + void *GetDataPtr_NextTL(cGH *GH, const char *field); + int CCTK_GetArgc(void); /* PETSc wnats to know the prog. name */ + void CCTK_GetArgv(char **argv); /* and parameters, don't think they can be passed through */ + + + PC pc; /* preconditioner context */ + KSP ksp; /* Krylov subspace method context */ + int num_A; /* Number of A-arrays as needed ny Dan's MG */ + + + double norm; /* norm of solution error */ + int ierr; /* Check the return status of petsc */ + int retcode; /* Check the return status of CCTK */ + + double a[27]; /* The stencil array */ + int idx[27]; /* The column of the stencil array */ + int rank; /* Rank of the matrix/vector */ + int its; /* Number of iterations */ + + /* Loopers */ + int i,j,k,l,m,n; + + /* loop limits put in for stencil_w !=1 Ed Evans 1/19/98 */ + int imin,imax,jmin,jmax,kmin,kmax,interior; + + pGH *pughGH; /* The pugh Extension handle */ + int myproc; /* out processor */ + + /* Tolerances */ + CCTK_REAL rtol, atol, tolerance; + + /* Values to assemble the matrix */ + CCTK_REAL two=2.0, four=4.0, tmp; + CCTK_REAL pm4, psixp,psiyp,psizp,det; + CCTK_REAL dx,dy,dz; + CCTK_REAL uxx,uxy,uxz,uyy,uyz,uzz; + CCTK_REAL Gxxx,Gxxy,Gxxz,Gxyy,Gxyz,Gxzz; /* Christoffels */ + CCTK_REAL Gyxx,Gyxy,Gyxz,Gyyy,Gyyz,Gyzz; + CCTK_REAL Gzxx,Gzxy,Gzxz,Gzyy,Gzyz,Gzzz; + CCTK_REAL dxxx,dxxy,dxxz,dxyy,dxyz,dxzz; + CCTK_REAL dyxx,dyxy,dyxz,dyyy,dyyz,dyzz; + CCTK_REAL dzxx,dzxy,dzxz,dzyy,dzyz,dzzz; + CCTK_REAL *values; + + int nxs,nys,nzs; /* Size of the grid with stencils off... */ + int startpoint=0; /* My starting index (per proc) */ + int endpoint; /* One more than my end */ + int pstart, pend; /* A check for PETSc layout */ + int pvstart, pvend; /* A check for PETSc layout */ + int verbose; /* Is the solver verbose */ + int debug; /* Is the solver debug-verbose */ + int octant; /* Apply octant BCs inside */ + int conformal; /* Do we have conformal metric ? */ + int nabla_form; /* Which form of the nable */ + int reuse; /* Do PETSc reuse tricks? */ + int matnormalize; /* Normalize the central mat value to one? */ + CCTK_REAL ac; /* Storage for a(0,0,0) for renorm */ + int pctype, ksptype; /* Which PC and KSP to use. See below for + monster nested if statement. */ + int PetscTolStyle; + + /* For the upper metric form of nabla */ + CCTK_REAL *uxx3, *uxy3, *uxz3, *uyy3, *uyz3, *uzz3; + + + /* Pointers to the data of : petsc workspace/ the GF to solve / + metric / psi / derivs(psi) / Mlinear / Nlinear(source) */ + CCTK_REAL *wsp =NULL, *ell_field=NULL; + CCTK_REAL *gxx =NULL, *gxy =NULL; + CCTK_REAL *gxz =NULL, *gyy =NULL; + CCTK_REAL *gyz =NULL, *gzz =NULL; + CCTK_REAL *Mlin=NULL, *Nlin=NULL; + CCTK_REAL *psi =NULL; + CCTK_REAL *psix=NULL, *psiy=NULL, *psiz=NULL; + + + + + octant = CCTK_Equals(domain,"octant"); + verbose = CCTK_Equals(petsc_verbose,"yes")||CCTK_Equals(petsc_verbose,"debug"); + debug = CCTK_Equals(petsc_verbose,"debug"); + reuse = 0; + matnormalize = 0; + + if (MetricPsiISize==7) conformal=1; + else if (MetricPsiISize==7) conformal=0; + else CCTK_WARN(0,"Size of the Metric must be either 7 (metric+conformal) or 6 (metric)"); + + + /* FIXME, the TolAbs/TolRel will be evaluated here */ + PetscTolStyle=0; + tolerance =AbsTol[0]; + + printf("\n**** PETSC !!!!****\n\n"); + + + /* Get the link to pugh Extension */ + pughGH = (pGH*)GH->extensions[CCTK_GHExtensionHandle("PUGH")]; + if (!pughGH) CCTK_WARN(0,"ETERNAL ERROR: Cannot find PUGH Extension Handle\n"); + + /* Things to do on first iteration */ + if (trips==0) { + int argc,i; + char **argv; + +#ifdef DEBUG + if (debug) + printf("PETSc: initial trip: %d \n",trips); +#endif + + /* Get the commandline arguments */ + argc = CCTK_GetArgc(); + argv = (char**) malloc ((argc+1)*sizeof(char*)); + for (i=0;i<argc;i++) + argv[i] = (char*) malloc (256*sizeof(char)); + CCTK_GetArgv(argv); + + /* Set the PETSc communicator to set of PUGH and + initialzie PETSc */ + ierr = PetscSetCommWorld(pughGH->PUGH_COMM_WORLD); CHKERRA(ierr); + PetscInitialize(&argc,&argv,NULL,NULL); + + CCTK_INFO("PETSc initialized"); + } + + trips++; + + /* Create a array of matrices A */ + /* num_A = MultiGridCount(); */ + num_A = 1; + A=(Mat*) malloc (sizeof(Mat)*num_A); + + /* Get the data ptr of these GFs, They all have to be + on the same timelevel */ + + /* derive the metric data pointer from the index array. + Note the ordering in the metric */ + ell_field = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*FieldIndex); + Mlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*MIndex); + Nlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*NIndex); + + gxx = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[0]); + gxy = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[1]); + gxz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[2]); + gyy = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[3]); + gyz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[4]); + gzz = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[5]); + + /* FIXME: evolved derivatives should go! -> Einstein specific*/ + if (conformal) { + psi = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, MetricPsiI[6]); + psix =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psix"); + psiy =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psiy"); + psiz =(CCTK_REAL*) GetDataPtr_NextTL(GH,"einstein::psiz"); + } + + wsp = GetDataPtr_NextTL(GH,"ellpetsc::wsp"); + + + /* initialize the linear index lookup table, after it's + filled up (below), the -1 indicates a boundary */ + for (i=0;i<pughGH->npoints;i++) wsp[i] = -1.0; + + /* Get myproc from the CCTK, get the gridspacing */ + myproc = pugh_MyProc(GH); + dx = GH->cctk_delta_space[0]; + dy = GH->cctk_delta_space[1]; + dz = GH->cctk_delta_space[2]; + + /* We fix the lower and upper boundary indices, that actually "active" in + the sense that they are no ghostzones: */ + imin=((pughGH->neighbors[pughGH->myproc][XDM]<0) && !(octant) ? 1 : + pughGH->nghostzones); + imax=((pughGH->neighbors[pughGH->myproc][XDP]<0) ? pughGH->lnx-1 : + pughGH->lnx-GH->cctk_nghostzones[0]); + + jmin=((pughGH->neighbors[pughGH->myproc][YDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[1]); + jmax=((pughGH->neighbors[pughGH->myproc][YDP]<0) ? pughGH->lny-1 : + pughGH->lny-GH->cctk_nghostzones[1]); + + kmin=((pughGH->neighbors[pughGH->myproc][ZDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[2]); + kmax=((pughGH->neighbors[pughGH->myproc][ZDP]<0) ? pughGH->lnz-1 : + pughGH->lnz-GH->cctk_nghostzones[2]); + + /* We need to get the global index of gridpoints (gps) owned on my proc. For that + we have to know how many gps are there before my processors (i<myproc), + so we count them here. (that's minus the ghostszones (we use PUGH's "rn" + variables (number of gp on proc[#] in direction [i]) and subtract the + ghostzones if nec.) */ + + for (i=0;i<myproc;i++) { + + nxs=pughGH->rnx[i]; + nxs=((pughGH->neighbors[i][XDM]<0) && !(octant) ? nxs-1 : + nxs-GH->cctk_nghostzones[0]); + nxs=((pughGH->neighbors[i][XDP]<0) ? nxs-1 : nxs-GH->cctk_nghostzones[0]); + + nys=pughGH->rny[i]; + nys=((pughGH->neighbors[i][YDM]<0) && !(octant) ? nys-1 : + nys-GH->cctk_nghostzones[1]); + nys=((pughGH->neighbors[i][YDP]<0) ? nys-1 : nys-GH->cctk_nghostzones[1]); + + nzs=pughGH->rnz[i]; + nzs=((pughGH->neighbors[i][ZDM]<0) && !(octant) ? nzs-1 : + nzs-GH->cctk_nghostzones[2]); + nzs=((pughGH->neighbors[i][ZDP]<0) ? nzs-1 : nzs-GH->cctk_nghostzones[2]); + + printf("PROC: %d nxyzs %d %d %d \n",i,nxs,nys,nzs); + startpoint += nxs*nys*nzs; + } + +#ifdef DEBUG + printf("STARTPOINT: %d \n",startpoint); +#endif + + /* ...we save that as a start ...*/ + endpoint = startpoint; + + /* ...and continue running over our own region to get our endpoint */ + for (k=imin;k<kmax;k++) + for (j=jmin;j<jmax;j++) + for (i=imin;i<imax;i++) + wsp[DATINDEX(pughGH,i,j,k)] = endpoint++; +#ifdef DEBUG + printf("ENDPOINT: %d \n",endpoint); +#endif DEBUG + + /* So now each point has a unique index of its own. If we + do a sync that means each processor knows the indiced + in the ghost zones (eg, on its neighbors) in a FD stencil + of 1 */ + retcode = pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + + /* So woohoo. Now for each point in our ijk space, we have + information about our row in the matrix (workspace->data[ijk]) + as well as the column for the stencil of the neighbors + (workspace->data[DATINDEX(GH,i+1,j,k)] etc...). So onwards */ + + /* FIXME I don't think this works for stencil=2 ! */ +#ifdef DEBUG + if (debug) + CCTK_DumpGHInfo(GH); +#endif + + nxs = pughGH->nx-2; + nys = pughGH->ny-2; + nzs = pughGH->nz-2; + +#ifdef DEBUG + if (debug) { + printf("nxyzs %d %d %d \n",nxs,nys,nzs); + printf("lnxyz: %d %d %d \n",pughGH->lnx,pughGH->lny,pughGH->lnz); + } +#endif + + /* Suck off boundaries... */ + rank = nxs*nys*nzs; + + + /* Create the petsc matrix A and vectors x and b efficiently. + + Now for efficiency we have to be really careful here. By + setting up the index array, we are abandoning the 19-way + tri-diagonal system so we can lay out our matrices on the + processors with the data. (One one proc we will still + get 19 way tridiagonal). So we want each processor + to own only part of the matrix/vector but we know exactly + what it is, eg, from startpoint to endpoint-1 is on our + local processor (that is, row-wise ; all columns are local). + + So we can use MatCreateMPIAIJ for exactly this, and + VecCreateMPI for the vectors. + */ + + /* FIXME: perhaps only set up only on first iteration, this was a hack + by PAUL, using some inofficical petsc code, check if this is + "official", yet */ + + /*$if (trips == 0 || reuse == 0) {$*/ + if (verbose) CCTK_INFO("Creating Matrices and Vectors ...."); + ierr = MatCreateMPIAIJ(pughGH->PUGH_COMM_WORLD, + (endpoint-startpoint), /* # of rows */ + (endpoint-startpoint), /* This is confusing */ + rank,rank, /* Matrix size */ + 19,PETSC_NULL, /* Diagonal */ + 19,PETSC_NULL, /* Off diagonal */ + &A[0]); /* The output */ + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&soln); + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&b); + CHKERRA(ierr); + /*$}$*/ + /*$else {$*/ + /* ierr = MatRestoreValuesMemory(A); */ + /*$CCTK_WARN(0,"no reuse"); + CHKERRA(ierr); + }$*/ + + + + + /* Compare the PETSc layout to Cactus, this better be a match */ + ierr = VecGetOwnershipRange(soln,&pvstart,&pvend); + ierr = MatGetOwnershipRange(A[0],&pstart,&pend); + printf("CAC M-Layout: %d %d \n",startpoint, endpoint); + printf("PET M-Layout: %d %d \n",pstart, pend); + printf("PET V-Layout: %d %d \n",pvstart,pvend); + if (pstart != startpoint && pend != endpoint) + CCTK_WARN(1,"WARNING: PETSC and data layouts differ! (why??)\n"); + + + /* Decide on the nabla form in PETSc: */ + if (CCTK_EQUALS(petsc_nablaform,"down")) { + if (verbose) + CCTK_INFO("Forming nabla with lower g and finite difference of dg \n"); + nabla_form = 2; + + } + else + if (CCTK_EQUALS(petsc_nablaform,"up")) { + if (verbose) + CCTK_INFO("Forming nabla with upper g and finite difference of dg \n"); + nabla_form = 3; + uxx3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + uxy3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + uxz3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + uyy3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + uyz3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + uzz3 = malloc(pughGH->npoints*sizeof(CCTK_REAL)); + for (i=0;i<pughGH->npoints;i++) { + CCTK_REAL det; + CCTK_REAL p12; + det= -(SQR(gxz[i])*gyy[i]) + + 2*gxy[i]*gxz[i]*gyz[i] - + gxx[i]*SQR(gyz[i]) - + SQR(gxy[i])*gzz[i] + + gxx[i]*gyy[i]*gzz[i]; + + if (conformal) { + pm4 = 1./pow(psi[i],4.0); + p12 = pow(psi[i],12.0); + } else { + pm4 = 1.0; + p12 = 1.0; + } + + /*invert metric. This is the conformal upper metric. */ + uxx3[i]=(-SQR(gyz[i]) + gyy[i]*gzz[i])/det*pm4; + uxy3[i]=(gxz[i]*gyz[i] - gxy[i]*gzz[i])/det*pm4; + uyy3[i]=(-SQR(gxz[i]) + gxx[i]*gzz[i])/det*pm4; + uxz3[i]=(-gxz[i]*gyy[i] + gxy[i]*gyz[i])/det*pm4; + uyz3[i]=(gxy[i]*gxz[i] - gxx[i]*gyz[i])/det*pm4; + uzz3[i]=(-SQR(gxy[i]) + gxx[i]*gyy[i])/det*pm4; + + det = det*p12; + + /* Rescaling for the uppermetric solver */ + Mlin[i] = Mlin[i]*sqrt(det); + Nlin[i] = Nlin[i]*sqrt(det); + + uxx3[i]=uxx3[i]/(2.*dx*dx)*sqrt(det); + uyy3[i]=uyy3[i]/(2.*dy*dy)*sqrt(det); + uzz3[i]=uzz3[i]/(2.*dz*dz)*sqrt(det); + uxy3[i]=uxy3[i]/(4.*dx*dy)*sqrt(det); + uxz3[i]=uxz3[i]/(4.*dx*dz)*sqrt(det); + uyz3[i]=uyz3[i]/(4.*dy*dz)*sqrt(det); + } + } + else CCTK_WARN(0,"Don't know how to form nabla!\n"); + + if (verbose) + CCTK_INFO("Creating the coefficient matrix..."); + if (verbose && matnormalize) + CCTK_INFO ("...with diagonal renormalized to one"); + + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + + if (wsp[DATINDEX(pughGH,i,j,k)] >= 0) { + + CCTK_REAL tdxgxx, tdxgxy, tdxgxz, tdxgyy, tdxgyz, tdxgzz; + CCTK_REAL tdygxx, tdygxy, tdygxz, tdygyy, tdygyz, tdygzz; + CCTK_REAL tdzgxx, tdzgxy, tdzgxz, tdzgyy, tdzgyz, tdzgzz; + + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + CCTK_REAL rhsval = 0; + + for (I=0;I<27;I++) a[I] = 0.0; + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); /* get linear index */ + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Get the row we are working on */ + I = wsp[ijk]; + + /* Setup Temporaries / Psi derivatives on psi */ + if (conformal) { + pm4 = 1./pow(psi[ijk],4.0); + psixp = psix[ijk]; + psiyp = psiy[ijk]; + psizp = psiz[ijk]; + } else { + pm4 = 1.0; + psixp = 0.0; + psiyp = 0.0; + psizp = 0.0; + } + + if (nabla_form == 2) { + /* Use finite differences of g for the d's */ + int ijkp, ijkm; + + /* X derivatives */ + ijkp = DATINDEX(pughGH,i+1,j,k); + ijkm = DATINDEX(pughGH,i-1,j,k); + + tdxgxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dx); + tdxgxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dx); + tdxgxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dx); + tdxgyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dx); + tdxgyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dx); + tdxgzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dx); + + + /* Y derivatives */ + ijkp = DATINDEX(pughGH,i,j+1,k); + ijkm = DATINDEX(pughGH,i,j-1,k); + + tdygxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dy); + tdygxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dy); + tdygxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dy); + tdygyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dy); + tdygyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dy); + tdygzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dy); + + /* X derivatives */ + ijkp = DATINDEX(pughGH,i,j,k+1); + ijkm = DATINDEX(pughGH,i,j,k-1); + + tdzgxx = (gxx[ijkp] - gxx[ijkm])/(2.0*dz); + tdzgxy = (gxy[ijkp] - gxy[ijkm])/(2.0*dz); + tdzgxz = (gxz[ijkp] - gxz[ijkm])/(2.0*dz); + tdzgyy = (gyy[ijkp] - gyy[ijkm])/(2.0*dz); + tdzgyz = (gyz[ijkp] - gyz[ijkm])/(2.0*dz); + tdzgzz = (gzz[ijkp] - gzz[ijkm])/(2.0*dz); + + /* great ... so start hacking away at the coefficients. + /* Form upper metric - compute determinant */ + det= -(SQR(gxz[ijk])*gyy[ijk]) + + 2*gxy[ijk]*gxz[ijk]*gyz[ijk] - + gxx[ijk]*SQR(gyz[ijk]) - + SQR(gxy[ijk])*gzz[ijk] + + gxx[ijk]*gyy[ijk]*gzz[ijk]; + + /*invert metric. This is the conformal upper metric. */ + uxx=(-SQR(gyz[ijk]) + gyy[ijk]*gzz[ijk])/det; + uxy=(gxz[ijk]*gyz[ijk] - gxy[ijk]*gzz[ijk])/det; + uyy=(-SQR(gxz[ijk]) + gxx[ijk]*gzz[ijk])/det; + uxz=(-gxz[ijk]*gyy[ijk] + gxy[ijk]*gyz[ijk])/det; + uyz=(gxy[ijk]*gxz[ijk] - gxx[ijk]*gyz[ijk])/det; + uzz=(-SQR(gxy[ijk]) + gxx[ijk]*gyy[ijk])/det; + + /* Coeff. Contributions from second derivative */ + + /* X derivative */ + a(-1,0,0) = pm4 * uxx / (dx*dx); + a(0,0,0) = -two * pm4 * uxx / (dx*dx); + a(1,0,0) = pm4 * uxx / (dx*dx); + /* Y derivative */ + a(0,-1,0) = pm4 * uyy / (dy*dy); + a(0,0,0) = a(0,0,0) - two * pm4 * uyy / (dy*dy); + a(0,1,0) = pm4 * uyy / (dy*dy); + /* Z derivative */ + a(0,0,-1) = pm4 * uzz / (dz*dz); + a(0,0,0) = a(0,0,0) - two * pm4 * uzz / (dz*dz); + a(0,0,1) = pm4 * uzz / (dz*dz); + /* Mixed XY */ + a(1,1,0) = two * pm4 * uxy / (dx*dy); + a(-1,-1,0)= two * pm4 * uxy / (dx*dy); + a(1,-1,0) = -two * pm4 * uxy / (dx*dy); + a(-1,1,0) = -two * pm4 * uxy / (dx*dy); + /* Mixed XZ */ + a(1,0,1) = two * pm4 * uxz / (dx*dz); + a(-1,0,-1)= two * pm4 * uxz / (dx*dz); + a(1,0,-1) = -two * pm4 * uxz / (dx*dz); + a(-1,0,1) = -two * pm4 * uxz / (dx*dz); + /* Mixed YZ */ + a(0,1,1) = two * pm4 * uyz / (dz*dy); + a(0,-1,-1)= two * pm4 * uyz / (dz*dy); + a(0,-1,1) = -two * pm4 * uyz / (dz*dy); + a(0,1,-1) = -two * pm4 * uyz / (dz*dy); + + /* Great so now form christoffels. Remember that + + G_kij = psi^4 (2 * (psi_j/psi g_ik + psi_i/psi g_jk - + psi_k/psi g_ij) + + D_jik + D_ijk - D_kij) + + Since these are the DOWN christoffels, store them in d... + + NOTE however that since we will up these, we can drop the + psi^4 here and psi^-4 from the up metric. */ + + + /* These three have lots of cancelations */ + dxxx = (two * psixp * gxx[ijk] + tdxgxx); + dxxy = (two * psiyp * gxx[ijk] + tdygxx); + dxxz = (two * psizp * gxx[ijk] + tdzgxx); + /* This one has a reduction of two identical terms */ + dxyy = (four * psiyp * gxy[ijk] - + two * psixp * gyy[ijk] + + two * tdygxy - tdxgyy); + /* As does this one */ + dxzz = (four * psizp * gxz[ijk] - + two * psixp * gzz[ijk] + + two * tdzgxz - tdxgzz); + /* And this one is completely general */ + dxyz = (two * psiyp * gxz[ijk] + + two * psizp * gxy[ijk] - + two * psixp * gyz[ijk] + + tdzgxy + tdygxz - tdxgyz); + + /* Now do it twice more without the explanations */ + dyyy = (two * psiyp * gyy[ijk] + tdygyy); + dyxy = (two * psixp * gyy[ijk] + tdxgyy); + dyyz = (two * psizp * gyy[ijk] + tdzgyy); + dyxx = (four * psixp * gxy[ijk] - + two * psiyp * gxx[ijk] + + two * tdxgxy - tdygxx); + dyzz = (four * psizp * gyz[ijk] - + two * psiyp * gzz[ijk] + + two * tdzgyz - tdygzz); + dyxz = (two * psizp * gxy[ijk] + + two * psixp * gyz[ijk] - + two * psiyp * gxz[ijk] + + tdzgxy + tdxgyz - tdygxz); + + dzzz = (two * psizp * gzz[ijk] + tdzgzz); + dzxz = (two * psixp * gzz[ijk] + tdxgzz); + dzyz = (two * psiyp * gzz[ijk] + tdygzz); + dzxx = (four * psixp * gxz[ijk] - + two * psizp * gxx[ijk] + + two * tdxgxz - tdzgxx); + dzyy = (four * psiyp * gyz[ijk] - + two * psizp * gyy[ijk] + + two * tdygyz - tdzgyy); + dzxy = (two * psiyp * gxz[ijk] + + two * psixp * gyz[ijk] - + two * psizp * gxy[ijk] + + tdxgyz + tdygxz - tdzgxy); + + /* And now raise the first index */ + Gxxx = uxx*dxxx + uxy*dyxx + uxz*dzxx; + Gxxy = uxx*dxxy + uxy*dyxy + uxz*dzxy; + Gxxz = uxx*dxxz + uxy*dyxz + uxz*dzxz; + Gxyy = uxx*dxyy + uxy*dyyy + uxz*dzyy; + Gxyz = uxx*dxyz + uxy*dyyz + uxz*dzyz; + Gxzz = uxx*dxzz + uxy*dyzz + uxz*dzzz; + + Gyxx = uxy*dxxx + uyy*dyxx + uyz*dzxx; + Gyxy = uxy*dxxy + uyy*dyxy + uyz*dzxy; + Gyxz = uxy*dxxz + uyy*dyxz + uyz*dzxz; + Gyyy = uxy*dxyy + uyy*dyyy + uyz*dzyy; + Gyyz = uxy*dxyz + uyy*dyyz + uyz*dzyz; + Gyzz = uxy*dxzz + uyy*dyzz + uyz*dzzz; + + Gzxx = uxz*dxxx + uyz*dyxx + uzz*dzxx; + Gzxy = uxz*dxxy + uyz*dyxy + uzz*dzxy; + Gzxz = uxz*dxxz + uyz*dyxz + uzz*dzxz; + Gzyy = uxz*dxyy + uyz*dyyy + uzz*dzyy; + Gzyz = uxz*dxyz + uyz*dyyz + uzz*dzyz; + Gzzz = uxz*dxzz + uyz*dyzz + uzz*dzzz; + + + /* Great. So now start adding the summed contributions + from the first derivative. Note that these all have a + sign change since the term comes in - ... */ + + /* g^ij G ^x_ij */ + tmp = uxx * Gxxx + uyy * Gxyy + uzz * Gxzz + + two * uxy * Gxxy + + two * uxz * Gxxz + + two * uyz * Gxyz; + a(1,0,0) = a(1,0,0) - pm4 * tmp / (two*dx); + a(-1,0,0) = a(-1,0,0) + pm4 * tmp / (two*dx); + + /* g^ij G^y_ij */ + tmp = uxx * Gyxx + uyy * Gyyy + uzz * Gyzz + + two * uxy * Gyxy + + two * uxz * Gyxz + + two * uyz * Gyyz; + a(0,1,0) = a(0,1,0) - pm4 * tmp / (two*dy); + a(0,-1,0) = a(0,-1,0) + pm4 * tmp / (two*dy); + + /* g^ij G^z_ij */ + tmp = uxx * Gzxx + uyy * Gzyy + uzz * Gzzz + + two * uxy * Gzxy + + two * uxz * Gzxz + + two * uyz * Gzyz; + a(0,0,1) = a(0,0,1) - pm4 * tmp / (two*dz); + a(0,0,-1) = a(0,0,-1) + pm4 * tmp / (two*dz); + + } /* end if nable_form==2 */ + + else { + /* nabla_form==3: Upper Metric Nabla Form */ + a(0,0,0) = -Uxx(i+1,j,k) -2.*Uxx(i,j,k) -Uxx(i-1,j,k) + -Uyy(i,j+1,k) -2.*Uyy(i,j,k) -Uyy(i,j-1,k) + -Uzz(i,j,k+1) -2.*Uzz(i,j,k) -Uzz(i,j,k-1); + + /*$ae = uxx(i+1,j,k)+uxx(i,j,k)$*/ + a(1,0,0) = Uxx(i+1,j,k) + Uxx(i,j,k); + /*$aw = uxx(i-1,j,k)+uxx(i,j,k)$*/ + a(-1,0,0) = Uxx(i-1,j,k)+Uxx(i,j,k); + /*$an = uyy(i,j+1,k)+uyy(i,j,k)$*/ + a(0,1,0) = Uyy(i,j+1,k)+Uyy(i,j,k); + /*$as = uyy(i,j-1,k)+uyy(i,j,k)$*/ + a(0,-1,0) = Uyy(i,j-1,k)+Uyy(i,j,k); + /*$at = uzz(i,j,k+1)+uzz(i,j,k)$*/ + a(0,0,1) = Uzz(i,j,k+1)+Uzz(i,j,k); + /*$ab = uzz(i,j,k-1)+uzz(i,j,k)$*/ + a(0,0,-1) = Uzz(i,j,k-1)+Uzz(i,j,k); + + /*$ane = uxy(i,j+1,k)+uxy(i+1,j,k)$*/ + a(1,1,0) = Uxy(i,j+1,k)+Uxy(i+1,j,k); + /*$anw = - uxy(i-1,j,k)-uxy(i,j+1,k)$*/ + a(-1,1,0) = - Uxy(i-1,j,k)-Uxy(i,j+1,k); + /*$ase = - uxy(i+1,j,k)-uxy(i,j-1,k)$*/ + a(1,-1,0) = - Uxy(i+1,j,k)-Uxy(i,j-1,k); + /*$asw = uxy(i-1,j,k)+uxy(i,j-1,k)$*/ + a(-1,-1,0) = Uxy(i-1,j,k)+Uxy(i,j-1,k); + /*$ate = uxz(i,j,k+1)+uxz(i+1,j,k)$*/ + a(1,0,1) = Uxz(i,j,k+1)+Uxz(i+1,j,k); + /*$atw = - uxz(i-1,j,k)-uxz(i,j,k+1)$*/ + a(-1,0,1) = - Uxz(i-1,j,k)-Uxz(i,j,k+1); + /*$abe = - uxz(i+1,j,k)-uxz(i,j,k-1)$*/ + a(1,0,-1) = - Uxz(i+1,j,k)-Uxz(i,j,k-1); + /*$abw = uxz(i-1,j,k)+uxz(i,j,k-1)$*/ + a(-1,0,-1) = Uxz(i-1,j,k)+Uxz(i,j,k-1); + /*$atn = uyz(i,j+1,k)+uyz(i,j,k+1)$*/ + a(0,1,1) = Uyz(i,j+1,k)+Uyz(i,j,k+1); + /*$ats = - uyz(i,j,k+1)-uyz(i,j-1,k)$*/ + a(0,-1,1) = - Uyz(i,j,k+1)-Uyz(i,j-1,k); + /*$abn = - uyz(i,j,k-1)-uyz(i,j+1,k)$*/ + a(0,1,-1) = - Uyz(i,j,k-1)-Uyz(i,j+1,k); + /*$asb = uyz(i,j,k-1)+uyz(i,j-1,k)$*/ + a(0,-1,-1) = Uyz(i,j,k-1)+Uyz(i,j-1,k); + } /* end nabla_form=3 */ + + /* M phi */ + a(0,0,0) = a(0,0,0) - Mlin[ijk]; + + /* Great now set the values of the matrix. This is + really painful due to the boundaries (here we force + dirichlet). */ + VecSetValues(soln,1,&I,&(ell_field[ijk]),INSERT_VALUES); + + /* Put in the octant boundary conditions. + + We do these by reflecting the -1 stencil if we are at a + boundary at -1. That is, imagine the 1D laplace equation + with the boundary condition a(0) = a(1). This means + the point 1 stencil (which is our first stencil in + the matrix) becomes + + a(0) + a(2) - 2 a(1) = rho(i) * deltax -> + a(2) - 2 a(1) + a(1) = rho(i) * deltax + + OK so think about this with a general stenci + + S_0 a_0 + S_1 a_1 + S_2 a_2 = rho_1 -> + S_0 a_1 + S_1 a_1 + S_2 a_2 = rho_1 -> + (S_0 + S_1) a_1 + S_2 a_2 = rho_1 + + eg, S_0 -> 0 and S_1 -> S_1 + S_0 + + Great, so implement this in 3D. It is a bit trickier, + since we don't want to zero neighbors in the wrong + direction, but not impossible. + + */ + + if (octant) + for (l=-1;l<=0;l++) + for (m=-1;m<=0;m++) + for (n=-1;n<=0;n++) + if (l*m*n == 0) + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0 && + (ig == imin || jg == jmin || kg == kmin)) + { + /* We are on an inner boundary point, eg, + at an inner face */ + int ll,mm,nn; + /* Only zero the guys at the boundaries */ + ll = (ig == imin ? 0 : l); + mm = (jg == jmin ? 0 : m); + nn = (kg == kmin ? 0 : n); + a(ll,mm,nn) += a(l,m,n); + a(l,m,n) = 0.0; + } + + /* renormalize */ + if (matnormalize) { + ac = a(0,0,0); + for (J=0;J<27;J++) a[J] = a[J] / ac; + } + else ac = 1; + + + /* This is the new way-clever look. Note it relies + heavily on the index array in the workspace and + on multiple processors it will *NOT* make a + 19-way banded matrix. */ + + for (l=-1;l<=1;l++) + for (m=-1;m<=1;m++) + for (n=-1;n<=1;n++) { + if (l*m*n == 0) { /* This is the 19 point ... if none are + * zero, then we have no stencil here. + */ + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0) { + /* This is a boundary. */ + rhsval += a(l,m,n) * ell_field[DATINDEX(pughGH,i+l,j+m,k+n)]; + } + else { + J = wsp[DATINDEX(pughGH,i+l,j+m,k+n)]; + ierr = MatSetValues(A[0],1,&I,1,&J,&(a(l,m,n)),INSERT_VALUES); + CHKERRA(ierr); + } + } + } + + rhsval = -rhsval + Nlin[ijk] / ac; + /* printf("RHS(%d %d %d)+N: %f",i,j,k,rhsval);*/ + + ierr = VecSetValues(b,1,&I,&rhsval,INSERT_VALUES); + CHKERRA(ierr); + + } + } + } + } + + if (verbose) CCTK_INFO ("Assembling the vectors"); + ierr = MatAssemblyBegin(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyBegin(soln); CHKERRA(ierr); + ierr = VecAssemblyBegin(b); CHKERRA(ierr) + ierr = MatAssemblyEnd(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyEnd(soln); CHKERRA(ierr); + ierr = VecAssemblyEnd(b); CHKERRA(ierr); + ierr = MatSetOption(A[0],MAT_NO_NEW_NONZERO_LOCATIONS); CHKERRA(ierr); + + if (nabla_form == 3) { + if (verbose) + printf ("Freeing upper metric storage\n"); + free(uxx3); + free(uxy3); + free(uxz3); + free(uyy3); + free(uyz3); + free(uzz3); + } + if (trips==0) + OptionsSetValue("-ksp_monitor",""); + + CCTK_INFO("CREATING SLES"); + ierr = SLESCreate(pughGH->PUGH_COMM_WORLD,&sles); CHKERRA(ierr); + + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. At a later date + we can probably optimize this using SAME_NONZERO_PATTERN + and a static trip-though flag (eg, on the second trip + through do something different). Also we need to think about + using A to precondition itself. Since I don't know what this + means, we'll leave it for now. */ + + CCTK_INFO("CREATING SLES OPERATOR"); + ierr = SLESSetOperators(sles,A[0],A[0],DIFFERENT_NONZERO_PATTERN); CHKERRA(ierr); + + /* Set linear solver defaults for this problem. Later this + should be replaced/modified with appropriate parsing from the + parameter parser. For now it is not. These defaults are + reasonable, I hope. */ + CCTK_INFO("SLESGet KSP/PC"); + ierr = SLESGetKSP(sles,&ksp); CHKERRA(ierr); + ierr = SLESGetPC(sles,&pc); CHKERRA(ierr); + + /* Get the PC Type */ + if (CCTK_Equals(petsc_PC_type,"PCJACOBI")) + ierr = PCSetType(pc,PCJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCBJACOBI")) + ierr = PCSetType(pc,PCBJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCICC")) + ierr = PCSetType(pc,PCICC); + else if (CCTK_Equals(petsc_PC_type,"PCILU")) + ierr = PCSetType(pc,PCILU); + else if (CCTK_Equals(petsc_PC_type,"PCASM")) + ierr = PCSetType(pc,PCASM); + else if (CCTK_Equals(petsc_PC_type,"PCLU")) + ierr = PCSetType(pc,PCLU); + else if (CCTK_Equals(petsc_PC_type,"PCNONE")) + ierr = PCSetType(pc,PCNONE); + else { + CCTK_WARN(1,"Don't understand petsc_PC_type. Using PCNONE\n"); + ierr = PCSetType(pc,PCNONE); + } + CHKERRA(ierr); + + + /* Now the same thing for the KSP Type */ + if (CCTK_Equals(petsc_KSP_type,"KSPBCGS")) + ierr = KSPSetType(ksp,KSPBCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPRICHARDSON")) + ierr = KSPSetType(ksp,KSPRICHARDSON); + else if (CCTK_Equals(petsc_KSP_type,"KSPCHEBYCHEV")) + ierr = KSPSetType(ksp,KSPCHEBYCHEV); + else if (CCTK_Equals(petsc_KSP_type,"KSPCG")) + ierr = KSPSetType(ksp,KSPCG); + else if (CCTK_Equals(petsc_KSP_type,"KSPGMRES")) + ierr = KSPSetType(ksp,KSPGMRES); + else if (CCTK_Equals(petsc_KSP_type,"KSPCGS")) + ierr = KSPSetType(ksp,KSPCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPTFQMR")) + ierr = KSPSetType(ksp,KSPTFQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPTCQMR")) + ierr = KSPSetType(ksp,KSPTCQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPCR")) + ierr = KSPSetType(ksp,KSPCR); + else if (CCTK_Equals(petsc_KSP_type,"KSPLSQR")) + ierr = KSPSetType(ksp,KSPLSQR); + else { + CCTK_WARN (1,"I don't understand petsc_KSP_type. Using BiCGSTAB\n"); + ierr = KSPSetType(ksp,KSPBCGS); + } + CHKERRA(ierr); + + + + /* Set up tolerances */ + + if (PetscTolStyle == ELLCONV_ABSOLUTE) { + rtol = 1.0e-15; + atol = tolerance; + } else if (PetscTolStyle == ELLCONV_RELATIVE) { + rtol = tolerance; + atol = 1.0e-15; + } else if (PetscTolStyle == ELLCONV_EITHER) { + rtol = tolerance; + atol = tolerance; + } else { + printf("PETSC Solver: PetscTolStyle set incorrectly [%d]\n", + PetscTolStyle); + } + + ierr = KSPSetTolerances(ksp,rtol,atol,PETSC_DEFAULT, + PETSC_DEFAULT); + CHKERRA(ierr); + + + /* We are warned in the manual that + + The default technique for orthogonalization of the Hessenberg matrix + in GMRES is the modified Gram-Schmidt method, which employs many + VecDot() operations and can thus be slow in parallel. A fast approach + is to use the unmodified Gram-Schmidt method, which can be set with + + ierr = KSPGMRESSetOrthogonalization(KSP ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + + or the options database command + -ksp_gmres_unmodifiedgramschmidt. Note that this algorithm is + numerically unstable, but may deliver much better speed + performance. One can also use unmodifed Gram-Schmidt with + iterative refinement, by setting the orthogonalization routine, + KSPGMRESIROrthog(), by using the command line option + -ksp_gmres_irorthog. + + So this my not be a smart thing to do. But lets put it here + so we can turn it on or off later. + + */ + /*$ierr = KSPGMRESSetOrthogonalization(ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + CHKERRA(ierr);$*/ + + /* We also learn that ... + + By default, KSP assumes an initial guess of zero by zeroing the + initial value for the solution vector that is given. To use a + nonzero initial guess, the user must call + + ierr = KSPSetInitialGuessNonzero(KSP ksp); + + */ + CCTK_INFO("KSPSetInitialGuess\n"); + ierr = KSPSetInitialGuessNonzero(ksp); CHKERRA(ierr); + + /* + Set runtime options. Since we don't use PETSC runtime options + but rather use the CACTUS parameter parser, we do this before + we parse things out. But that may not be such a good idea. + So lets put it here. + */ + ierr = SLESSetFromOptions(sles); CHKERRA(ierr); + + + /* OK so finally we are able to ... */ + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Solve the linear system + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + CCTK_INFO("SLES solve..."); + ierr = SLESSolve(sles,b,soln,&its); CHKERRA(ierr); + + /* Here we can form a "res = Ax - b" and find its norm to get + an idea of how well we've converged. Put this in later + but we can do it using MatMultAdd() after we flip the sign + on b with VecScale() (into res, a new vector, then find the + norm of res with VecNorm() + */ + + /* Now since we now have local layout, we can just get the values + from the soln vector now matter how many processors we are + on (eg, the VecCreateMPI has made the solution local) */ + + VecGetArray(soln,&values); + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + + CCTK_REAL rhsval = 0; + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); + if (wsp[ijk] >= 0) { + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Now this one. "Fortran-order" the matrix. But remember + we have stripped off the ghost zones. Hence ig-1 and nxs... + */ + I =wsp[ijk]-startpoint; + ell_field[ijk] = values[I]; + } + } + } + } + + VecRestoreArray(soln,&values); + + /* Sync var in CCTK speak. */ + pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + /* And finally free up the matrix memory FIXME + ierr = MatReleaseValuesMemory(A); CHKERRA(ierr); */ + + /* This code is not used anymore */ + if (verbose) CCTK_INFO("Destroying Matrices"); + ierr = SLESDestroy(sles); CHKERRA(ierr); + ierr = VecDestroy(soln); CHKERRA(ierr); + ierr = VecDestroy(b); CHKERRA(ierr); + ierr = MatDestroy(A[0]); CHKERRA(ierr); + free(A); + + /*$PetscFinalize();$*/ + + if (verbose) CCTK_INFO("LEAVING ELLPETSC"); + +} diff --git a/src/petsc_wrapper.c b/src/petsc_wrapper.c new file mode 100644 index 0000000..8cc39a4 --- /dev/null +++ b/src/petsc_wrapper.c @@ -0,0 +1,32 @@ + +#include "cctk.h" +#include "cctk_parameters.h" +#include "cctk_Flesh.h" + + +void petsc_metric(cGH *GH, int *MetricI, int *FieldIndex, + int *MIndex, int *NIndex, + int *AbsTol, int *RelTol) { + + void petsc_confmetric_solver(cGH *GH, int *MetricI, int MetricISize, + int *FieldIndex, int *MIndex, int *NIndex, + int *AbsTol, int *RelTol); + + petsc_confmetric_solver(GH, MetricI, 6, + FieldIndex, MIndex, NIndex, + AbsTol, RelTol); +} + + +void petsc_confmetric(cGH *GH, int *MetricPsiI, int *FieldIndex, + int *MIndex, int *NIndex, + int *AbsTol, int *RelTol) { + + void petsc_confmetric_solver(cGH *GH, int *MetricI, int MetricSize, + int *FieldIndex, int *MIndex, int *NIndex, + int *AbsTol, int *RelTol); + + petsc_confmetric_solver(GH, MetricPsiI, 7, + FieldIndex, MIndex, NIndex, + AbsTol, RelTol); +} |