aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588>1999-09-15 08:15:56 +0000
committerlanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588>1999-09-15 08:15:56 +0000
commitf931c69d368ae51b5d3b8f483cd73f5a4231ad6a (patch)
tree0f2f8aa298a4b359bc8db90108cc502cc59348fe
parent777dd97165113011b9e1d0ca558217dafe9e7aae (diff)
a flat space solver, very beta
git-svn-id: http://svn.cactuscode.org/arrangements/CactusElliptic/EllPETSc/trunk@8 1d96b42b-98df-4a6a-9d84-1b24288d4588
-rw-r--r--src/petsc_flat_solver.c691
1 files changed, 691 insertions, 0 deletions
diff --git a/src/petsc_flat_solver.c b/src/petsc_flat_solver.c
new file mode 100644
index 0000000..890289a
--- /dev/null
+++ b/src/petsc_flat_solver.c
@@ -0,0 +1,691 @@
+
+#include "cctk.h"
+#include "cctk_parameters.h"
+#include "cctk_Flesh.h"
+
+#include "assert.h"
+#include "mpi.h"
+#include "sles.h"
+
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+
+
+#define DEBUG
+
+/*Don't know what these actually mean! FIXME */
+#define ELLCONV_ABSOLUTE 0
+#define ELLCONV_RELATIVE 1
+#define ELLCONV_EITHER 2
+
+/* A few convenient macros */
+#define a(i,j,k) a[i+1 + 3*(j+1 + 3*(k+1))]
+#define SQR(a) ((a)*(a))
+
+
+
+/* Place the matrix in global space so we can keep it around
+ with the memory stripped out. Thanks, Barry! */
+
+static int trips=0;
+static Mat *A; /* linear system matrix */
+static Vec soln, b; /* approx solution, RHS */
+static SLES sles; /* linear solver context */
+
+
+
+void petsc_flat(cGH *GH, int *FieldIndex, int *MIndex, int *NIndex,
+ int *AbsTol, int *RelTol) {
+
+
+ DECLARE_CCTK_PARAMETERS /* CCTK passed parameters */
+
+
+ void *GetDataPtr_NextTL(cGH *GH, const char *field);
+ int CCTK_GetArgc(void); /* PETSc wnats to know the prog. name */
+ void CCTK_GetArgv(char **argv); /* and parameters, don't think they can be passed through */
+
+
+ PC pc; /* preconditioner context */
+ KSP ksp; /* Krylov subspace method context */
+ int num_A; /* Number of A-arrays as needed ny Dan's MG */
+
+
+ double norm; /* norm of solution error */
+ int ierr; /* Check the return status of petsc */
+ int retcode; /* Check the return status of CCTK */
+
+ double a[27]; /* The stencil array */
+ int idx[27]; /* The column of the stencil array */
+ int rank; /* Rank of the matrix/vector */
+ int its; /* Number of iterations */
+
+ /* Loopers */
+ int i,j,k,l,m,n,ll;
+
+ /* loop limits put in for stencil_w !=1 Ed Evans 1/19/98 */
+ int imin,imax,jmin,jmax,kmin,kmax,interior;
+
+ pGH *pughGH; /* The pugh Extension handle */
+ int myproc; /* out processor */
+
+ /* Tolerances */
+ CCTK_REAL rtol, atol, tolerance;
+
+ /* Values to assemble the matrix */
+ CCTK_REAL two=2.0, four=4.0, tmp;
+ CCTK_REAL pm4, psixp,psiyp,psizp,det;
+ CCTK_REAL dx,dy,dz;
+ CCTK_REAL uxx,uxy,uxz,uyy,uyz,uzz;
+ CCTK_REAL Gxxx,Gxxy,Gxxz,Gxyy,Gxyz,Gxzz; /* Christoffels */
+ CCTK_REAL Gyxx,Gyxy,Gyxz,Gyyy,Gyyz,Gyzz;
+ CCTK_REAL Gzxx,Gzxy,Gzxz,Gzyy,Gzyz,Gzzz;
+ CCTK_REAL dxxx,dxxy,dxxz,dxyy,dxyz,dxzz;
+ CCTK_REAL dyxx,dyxy,dyxz,dyyy,dyyz,dyzz;
+ CCTK_REAL dzxx,dzxy,dzxz,dzyy,dzyz,dzzz;
+ CCTK_REAL *values;
+
+ int nxs,nys,nzs; /* Size of the grid with stencils off... */
+ int startpoint=0; /* My starting index (per proc) */
+ int endpoint; /* One more than my end */
+ int pstart, pend; /* A check for PETSc layout */
+ int pvstart, pvend; /* A check for PETSc layout */
+ int verbose; /* Is the solver verbose */
+ int debug; /* Is the solver debug-verbose */
+ int octant; /* Apply octant BCs inside */
+ int conformal; /* Do we have conformal metric ? */
+ int nabla_form; /* Which form of the nable */
+ int reuse; /* Do PETSc reuse tricks? */
+ int matnormalize; /* Normalize the central mat value to one? */
+ CCTK_REAL ac; /* Storage for a(0,0,0) for renorm */
+ int pctype, ksptype; /* Which PC and KSP to use. See below for
+ monster nested if statement. */
+ int PetscTolStyle;
+
+ /* For the upper metric form of nabla */
+ CCTK_REAL *uxx3, *uxy3, *uxz3, *uyy3, *uyz3, *uzz3;
+
+
+ /* Pointers to the data of : petsc workspace/ the GF to solve /
+ metric / psi / derivs(psi) / Mlinear / Nlinear(source) */
+ CCTK_REAL *wsp =NULL, *ell_field=NULL;
+
+ CCTK_REAL *Mlin=NULL, *Nlin=NULL;
+
+ int Mstorage=0, Nstorage=0;
+
+
+ octant = CCTK_Equals(domain,"octant");
+ verbose = CCTK_Equals(petsc_verbose,"yes")||
+ CCTK_Equals(petsc_verbose,"debug");
+ debug = CCTK_Equals(petsc_verbose,"debug");
+ reuse = 0;
+ matnormalize = 0;
+
+ /* FIXME, the TolAbs/TolRel will be evaluated here */
+ PetscTolStyle=0;
+ tolerance =AbsTol[0];
+
+ /* Get the link to pugh Extension */
+ pughGH = (pGH*)GH->extensions[CCTK_GHExtensionHandle("PUGH")];
+ if (!pughGH) CCTK_WARN(0,"ETERNAL ERROR: Cannot find PUGH Extension Handle\n");
+
+ /* Things to do on first iteration */
+ if (trips==0) {
+ int CCTK_GetCommandLine(char ***outargv);
+ int argc;
+ char **argv;
+
+#ifdef DEBUG
+ if (debug)
+ printf("PETSc: initial trip: %d \n",trips);
+#endif
+
+ argc = CCTK_GetCommandLine(&argv);
+ ierr = PetscSetCommWorld(pughGH->PUGH_COMM_WORLD); CHKERRA(ierr);
+ PetscInitialize(&argc,&argv,NULL,NULL);
+
+ CCTK_INFO("PETSc initialized");
+ }
+
+ trips++;
+
+ num_A = 1;
+ A=(Mat*) malloc (sizeof(Mat)*num_A);
+
+ /* Get the data ptr of these GFs, They all have to be
+ on the same timelevel */
+
+ /* derive the metric data pointer from the index array.
+ Note the ordering in the metric */
+ ell_field = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*FieldIndex);
+
+ /* if we have a negative index, this GF is not needed,
+ there for don't even look for it. when index positive,
+ set flag Mstorage=1, dito for N */
+ if (*MIndex>=0) {
+ Mlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*MIndex);
+ Mstorage = 1;
+ }
+ if (*NIndex>=0) {
+ Nlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*NIndex);
+ Nstorage = 1;
+ }
+ wsp = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,CCTK_VarIndex("ellpetsc::wsp"));
+
+ /* initialize the linear index lookup table, after it's
+ filled up (below), the -1 indicates a boundary */
+ for (i=0;i<pughGH->npoints;i++) wsp[i] = -1.0;
+
+ /* Get myproc from the CCTK, get the gridspacing */
+ myproc = pugh_MyProc(GH);
+ dx = GH->cctk_delta_space[0];
+ dy = GH->cctk_delta_space[1];
+ dz = GH->cctk_delta_space[2];
+
+ /* We fix the lower and upper boundary indices, that actually "active" in
+ the sense that they are no ghostzones: */
+ imin=((pughGH->neighbors[pughGH->myproc][XDM]<0) && !(octant) ? 1 :
+ pughGH->nghostzones);
+ imax=((pughGH->neighbors[pughGH->myproc][XDP]<0) ? pughGH->lnx-1 :
+ pughGH->lnx-GH->cctk_nghostzones[0]);
+
+ jmin=((pughGH->neighbors[pughGH->myproc][YDM]<0) && !(octant) ? 1 :
+ GH->cctk_nghostzones[1]);
+ jmax=((pughGH->neighbors[pughGH->myproc][YDP]<0) ? pughGH->lny-1 :
+ pughGH->lny-GH->cctk_nghostzones[1]);
+
+ kmin=((pughGH->neighbors[pughGH->myproc][ZDM]<0) && !(octant) ? 1 :
+ GH->cctk_nghostzones[2]);
+ kmax=((pughGH->neighbors[pughGH->myproc][ZDP]<0) ? pughGH->lnz-1 :
+ pughGH->lnz-GH->cctk_nghostzones[2]);
+
+ /* We need to get the global index of gridpoints (gps) owned on my proc.
+ For that we have to know how many gps are there before my processors
+ (i<myproc), so we count them here (that's minus the ghostszones
+ (we use PUGH's "rn" variables (number of gp on proc[#] in
+ direction [i]) and subtract the ghostzones if nec.) */
+
+ for (i=0;i<myproc;i++) {
+
+ nxs=pughGH->rnx[i];
+ nxs=((pughGH->neighbors[i][XDM]<0) && !(octant) ? nxs-1 :
+ nxs-GH->cctk_nghostzones[0]);
+ nxs=((pughGH->neighbors[i][XDP]<0) ? nxs-1 : nxs-GH->cctk_nghostzones[0]);
+
+ nys=pughGH->rny[i];
+ nys=((pughGH->neighbors[i][YDM]<0) && !(octant) ? nys-1 :
+ nys-GH->cctk_nghostzones[1]);
+ nys=((pughGH->neighbors[i][YDP]<0) ? nys-1 : nys-GH->cctk_nghostzones[1]);
+
+ nzs=pughGH->rnz[i];
+ nzs=((pughGH->neighbors[i][ZDM]<0) && !(octant) ? nzs-1 :
+ nzs-GH->cctk_nghostzones[2]);
+ nzs=((pughGH->neighbors[i][ZDP]<0) ? nzs-1 : nzs-GH->cctk_nghostzones[2]);
+
+ printf("PROC: %d nxyzs %d %d %d \n",i,nxs,nys,nzs);
+ startpoint += nxs*nys*nzs;
+ }
+
+#ifdef DEBUG
+ printf("STARTPOINT: %d \n",startpoint);
+#endif
+
+ /* ...we save that as a start ...*/
+ endpoint = startpoint;
+
+ /* ...and continue running over our own region to get our endpoint */
+ for (k=imin;k<kmax;k++)
+ for (j=jmin;j<jmax;j++)
+ for (i=imin;i<imax;i++)
+ wsp[DATINDEX(pughGH,i,j,k)] = endpoint++;
+#ifdef DEBUG
+ printf("ENDPOINT: %d \n",endpoint);
+#endif DEBUG
+
+ /* So now each point has a unique index of its own. If we
+ do a sync that means each processor knows the indiced
+ in the ghost zones (eg, on its neighbors) in a FD stencil
+ of 1 */
+ retcode = pugh_SyncGroup(GH,"ellpetsc::petscworkspace");
+
+
+ /* So woohoo. Now for each point in our ijk space, we have
+ information about our row in the matrix (workspace->data[ijk])
+ as well as the column for the stencil of the neighbors
+ (workspace->data[DATINDEX(GH,i+1,j,k)] etc...). So onwards */
+
+ nxs = pughGH->nx-2;
+ nys = pughGH->ny-2;
+ nzs = pughGH->nz-2;
+
+#ifdef DEBUG
+ if (debug) {
+ printf("nxyzs %d %d %d \n",nxs,nys,nzs);
+ printf("lnxyz: %d %d %d \n",pughGH->lnx,pughGH->lny,pughGH->lnz);
+ }
+#endif
+
+ /* Suck off boundaries... */
+ rank = nxs*nys*nzs;
+
+
+ /* Create the petsc matrix A and vectors x and b efficiently.
+
+ Now for efficiency we have to be really careful here. By
+ setting up the index array, we are abandoning the 19-way
+ tri-diagonal system so we can lay out our matrices on the
+ processors with the data. (One one proc we will still
+ get 19 way tridiagonal). So we want each processor
+ to own only part of the matrix/vector but we know exactly
+ what it is, eg, from startpoint to endpoint-1 is on our
+ local processor (that is, row-wise ; all columns are local).
+
+ So we can use MatCreateMPIAIJ for exactly this, and
+ VecCreateMPI for the vectors.
+ */
+
+ /* FIXME: perhaps only set up only on first iteration, this was a hack
+ by PAUL, using some inofficical petsc code, check if this is
+ "official", yet */
+
+ /*$if (trips == 0 || reuse == 0) {$*/
+ if (verbose) CCTK_INFO("Creating Matrices and Vectors ....");
+ ierr = MatCreateMPIAIJ(pughGH->PUGH_COMM_WORLD,
+ (endpoint-startpoint), /* # of rows */
+ (endpoint-startpoint), /* This is confusing */
+ rank,rank, /* Matrix size */
+ 19,PETSC_NULL, /* Diagonal */
+ 19,PETSC_NULL, /* Off diagonal */
+ &A[0]); /* The output */
+ CHKERRA(ierr);
+ ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&soln);
+ CHKERRA(ierr);
+ ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&b);
+ CHKERRA(ierr);
+ /*$}$*/
+ /*$else {$*/
+ /* ierr = MatRestoreValuesMemory(A); */
+ /*$CCTK_WARN(0,"no reuse");
+ CHKERRA(ierr);
+ }$*/
+
+
+
+
+ /* Compare the PETSc layout to Cactus, this better be a match */
+ ierr = VecGetOwnershipRange(soln,&pvstart,&pvend);
+ ierr = MatGetOwnershipRange(A[0],&pstart,&pend);
+ printf("CAC M-Layout: %d %d \n",startpoint, endpoint);
+ printf("PET M-Layout: %d %d \n",pstart, pend);
+ printf("PET V-Layout: %d %d \n",pvstart,pvend);
+ if (pstart != startpoint && pend != endpoint)
+ CCTK_WARN(1,"WARNING: PETSC and data layouts differ! (why??)\n");
+
+
+ for (k=kmin;k<kmax;k++) {
+ for (j=jmin;j<jmax;j++) {
+ for (i=imin;i<imax;i++) {
+
+ if (wsp[DATINDEX(pughGH,i,j,k)] >= 0) {
+
+ /* Set up indices */
+ int ijk; /* The data point for the array */
+ int ig, jg, kg; /* The position in global space */
+ int I,J; /* The col and row in the matrix */
+ CCTK_REAL rhsval = 0;
+
+ for (I=0;I<27;I++) a[I] = 0.0;
+ a( 0, 0, 0) = -10.38;
+ a( 1, 0, 0) = 1.56; /* ae */
+ a(-1, 0, 0) = 1.56; /* aw */
+ a( 0, 1, 0) = 1.56; /* an */
+ a( 0,-1, 0) = 1.56; /* as */
+ a( 0, 0, 1) = -1.56; /* at */
+ a( 0, 0,-1) = -1.56; /* ab */
+
+ /* these guys are easy */
+ ijk = DATINDEX(pughGH,i,j,k); /* get linear index */
+ ig = i + GH->cctk_lbnd[0];
+ jg = j + GH->cctk_lbnd[1];
+ kg = k + GH->cctk_lbnd[2];
+
+ /* Get the row we are working on */
+ I = wsp[ijk];
+
+ /* M phi */
+ if (Mstorage)
+ a(0,0,0) = a(0,0,0) - Mlin[ijk];
+
+ /* Great now set the values of the matrix. This is
+ really painful due to the boundaries (here we force
+ dirichlet). */
+ VecSetValues(soln,1,&I,&(ell_field[ijk]),INSERT_VALUES);
+
+ /* Put in the octant boundary conditions.
+
+ We do these by reflecting the -1 stencil if we are at a
+ boundary at -1. That is, imagine the 1D laplace equation
+ with the boundary condition a(0) = a(1). This means
+ the point 1 stencil (which is our first stencil in
+ the matrix) becomes
+
+ a(0) + a(2) - 2 a(1) = rho(i) * deltax ->
+ a(2) - 2 a(1) + a(1) = rho(i) * deltax
+
+ OK so think about this with a general stenci
+
+ S_0 a_0 + S_1 a_1 + S_2 a_2 = rho_1 ->
+ S_0 a_1 + S_1 a_1 + S_2 a_2 = rho_1 ->
+ (S_0 + S_1) a_1 + S_2 a_2 = rho_1
+
+ eg, S_0 -> 0 and S_1 -> S_1 + S_0
+
+ Great, so implement this in 3D. It is a bit trickier,
+ since we don't want to zero neighbors in the wrong
+ direction, but not impossible.
+
+ */
+
+ if (ijk==1918) {
+ for (l=-1;l<=1;l++)
+ for (m=-1;m<=1;m++)
+ for (n=-1;n<=1;n++) printf(" (%d %d %d): %f \n",l,m,n,a(l,m,n));
+ printf("\n");
+ }
+
+ if (octant)
+ for (l=-1;l<=0;l++)
+ for (m=-1;m<=0;m++)
+ for (n=-1;n<=0;n++)
+ if (l*m*n == 0)
+ if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0 &&
+ (ig == imin || jg == jmin || kg == kmin))
+ {
+ /* We are on an inner boundary point, eg,
+ at an inner face */
+ int ll,mm,nn;
+ /* Only zero the guys at the boundaries */
+ ll = (ig == imin ? 0 : l);
+ mm = (jg == jmin ? 0 : m);
+ nn = (kg == kmin ? 0 : n);
+ a(ll,mm,nn) += a(l,m,n);
+ a(l,m,n) = 0.0;
+ }
+
+ /* renormalize */
+ if (matnormalize) {
+ ac = a(0,0,0);
+ for (J=0;J<27;J++) a[J] = a[J] / ac;
+ }
+ else ac = 1;
+
+
+ /* This is the new way-clever look. Note it relies
+ heavily on the index array in the workspace and
+ on multiple processors it will *NOT* make a
+ 19-way banded matrix. */
+
+ for (l=-1;l<=1;l++)
+ for (m=-1;m<=1;m++)
+ for (n=-1;n<=1;n++) {
+ if (l*m*n == 0) { /* This is the 19 point ... if none are
+ * zero, then we have no stencil here.
+ */
+ if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0) {
+ /* This is a boundary. */
+ rhsval += a(l,m,n) * ell_field[DATINDEX(pughGH,i+l,j+m,k+n)];
+ }
+ else {
+ J = wsp[DATINDEX(pughGH,i+l,j+m,k+n)];
+ ierr = MatSetValues(A[0],1,&I,1,&J,&(a(l,m,n)),INSERT_VALUES);
+ CHKERRA(ierr);
+ }
+ }
+ }
+
+ if (Nstorage)
+ rhsval = -rhsval + Nlin[ijk] / ac;
+
+ ierr = VecSetValues(b,1,&I,&rhsval,INSERT_VALUES);
+ CHKERRA(ierr);
+
+ }
+ }
+ }
+ }
+
+ if (verbose) CCTK_INFO ("Assembling the vectors");
+ ierr = MatAssemblyBegin(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr);
+ ierr = VecAssemblyBegin(soln); CHKERRA(ierr);
+ ierr = VecAssemblyBegin(b); CHKERRA(ierr)
+ ierr = MatAssemblyEnd(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr);
+ ierr = VecAssemblyEnd(soln); CHKERRA(ierr);
+ ierr = VecAssemblyEnd(b); CHKERRA(ierr);
+ ierr = MatSetOption(A[0],MAT_NO_NEW_NONZERO_LOCATIONS); CHKERRA(ierr);
+
+ if (nabla_form == 3) {
+ if (verbose)
+ printf ("Freeing upper metric storage\n");
+ free(uxx3);
+ free(uxy3);
+ free(uxz3);
+ free(uyy3);
+ free(uyz3);
+ free(uzz3);
+ }
+ if (trips==0)
+ OptionsSetValue("-ksp_monitor","");
+
+ CCTK_INFO("CREATING SLES");
+ ierr = SLESCreate(pughGH->PUGH_COMM_WORLD,&sles); CHKERRA(ierr);
+
+
+ /*
+ Set operators. Here the matrix that defines the linear system
+ also serves as the preconditioning matrix. At a later date
+ we can probably optimize this using SAME_NONZERO_PATTERN
+ and a static trip-though flag (eg, on the second trip
+ through do something different). Also we need to think about
+ using A to precondition itself. Since I don't know what this
+ means, we'll leave it for now. */
+
+ CCTK_INFO("CREATING SLES OPERATOR");
+ ierr = SLESSetOperators(sles,A[0],A[0],DIFFERENT_NONZERO_PATTERN); CHKERRA(ierr);
+
+ /* Set linear solver defaults for this problem. Later this
+ should be replaced/modified with appropriate parsing from the
+ parameter parser. For now it is not. These defaults are
+ reasonable, I hope. */
+ CCTK_INFO("SLESGet KSP/PC");
+ ierr = SLESGetKSP(sles,&ksp); CHKERRA(ierr);
+ ierr = SLESGetPC(sles,&pc); CHKERRA(ierr);
+
+ /* Get the PC Type */
+ if (CCTK_Equals(petsc_PC_type,"PCJACOBI"))
+ ierr = PCSetType(pc,PCJACOBI);
+ else if (CCTK_Equals(petsc_PC_type,"PCBJACOBI"))
+ ierr = PCSetType(pc,PCBJACOBI);
+ else if (CCTK_Equals(petsc_PC_type,"PCICC"))
+ ierr = PCSetType(pc,PCICC);
+ else if (CCTK_Equals(petsc_PC_type,"PCILU"))
+ ierr = PCSetType(pc,PCILU);
+ else if (CCTK_Equals(petsc_PC_type,"PCASM"))
+ ierr = PCSetType(pc,PCASM);
+ else if (CCTK_Equals(petsc_PC_type,"PCLU"))
+ ierr = PCSetType(pc,PCLU);
+ else if (CCTK_Equals(petsc_PC_type,"PCNONE"))
+ ierr = PCSetType(pc,PCNONE);
+ else {
+ CCTK_WARN(1,"Don't understand petsc_PC_type. Using PCNONE\n");
+ ierr = PCSetType(pc,PCNONE);
+ }
+ CHKERRA(ierr);
+
+
+ /* Now the same thing for the KSP Type */
+ if (CCTK_Equals(petsc_KSP_type,"KSPBCGS"))
+ ierr = KSPSetType(ksp,KSPBCGS);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPRICHARDSON"))
+ ierr = KSPSetType(ksp,KSPRICHARDSON);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPCHEBYCHEV"))
+ ierr = KSPSetType(ksp,KSPCHEBYCHEV);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPCG"))
+ ierr = KSPSetType(ksp,KSPCG);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPGMRES"))
+ ierr = KSPSetType(ksp,KSPGMRES);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPCGS"))
+ ierr = KSPSetType(ksp,KSPCGS);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPTFQMR"))
+ ierr = KSPSetType(ksp,KSPTFQMR);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPTCQMR"))
+ ierr = KSPSetType(ksp,KSPTCQMR);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPCR"))
+ ierr = KSPSetType(ksp,KSPCR);
+ else if (CCTK_Equals(petsc_KSP_type,"KSPLSQR"))
+ ierr = KSPSetType(ksp,KSPLSQR);
+ else {
+ CCTK_WARN (1,"I don't understand petsc_KSP_type. Using BiCGSTAB\n");
+ ierr = KSPSetType(ksp,KSPBCGS);
+ }
+ CHKERRA(ierr);
+
+
+
+ /* Set up tolerances */
+
+ if (PetscTolStyle == ELLCONV_ABSOLUTE) {
+ rtol = 1.0e-15;
+ atol = tolerance;
+ } else if (PetscTolStyle == ELLCONV_RELATIVE) {
+ rtol = tolerance;
+ atol = 1.0e-15;
+ } else if (PetscTolStyle == ELLCONV_EITHER) {
+ rtol = tolerance;
+ atol = tolerance;
+ } else {
+ printf("PETSC Solver: PetscTolStyle set incorrectly [%d]\n",
+ PetscTolStyle);
+ }
+
+ ierr = KSPSetTolerances(ksp,rtol,atol,PETSC_DEFAULT,
+ PETSC_DEFAULT);
+ CHKERRA(ierr);
+
+
+ /* We are warned in the manual that
+
+ The default technique for orthogonalization of the Hessenberg matrix
+ in GMRES is the modified Gram-Schmidt method, which employs many
+ VecDot() operations and can thus be slow in parallel. A fast approach
+ is to use the unmodified Gram-Schmidt method, which can be set with
+
+ ierr = KSPGMRESSetOrthogonalization(KSP ksp,
+ KSPGMRESUnmodifiedGramSchmidtOrthogonalization);
+
+ or the options database command
+ -ksp_gmres_unmodifiedgramschmidt. Note that this algorithm is
+ numerically unstable, but may deliver much better speed
+ performance. One can also use unmodifed Gram-Schmidt with
+ iterative refinement, by setting the orthogonalization routine,
+ KSPGMRESIROrthog(), by using the command line option
+ -ksp_gmres_irorthog.
+
+ So this my not be a smart thing to do. But lets put it here
+ so we can turn it on or off later.
+
+ */
+ /*$ierr = KSPGMRESSetOrthogonalization(ksp,
+ KSPGMRESUnmodifiedGramSchmidtOrthogonalization);
+ CHKERRA(ierr);$*/
+
+ /* We also learn that ...
+
+ By default, KSP assumes an initial guess of zero by zeroing the
+ initial value for the solution vector that is given. To use a
+ nonzero initial guess, the user must call
+
+ ierr = KSPSetInitialGuessNonzero(KSP ksp);
+
+ */
+ CCTK_INFO("KSPSetInitialGuess\n");
+ ierr = KSPSetInitialGuessNonzero(ksp); CHKERRA(ierr);
+
+ /*
+ Set runtime options. Since we don't use PETSC runtime options
+ but rather use the CACTUS parameter parser, we do this before
+ we parse things out. But that may not be such a good idea.
+ So lets put it here.
+ */
+ ierr = SLESSetFromOptions(sles); CHKERRA(ierr);
+
+
+ /* OK so finally we are able to ... */
+
+ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ Solve the linear system
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+ CCTK_INFO("SLES solve...");
+ ierr = SLESSolve(sles,b,soln,&its); CHKERRA(ierr);
+
+ /* Here we can form a "res = Ax - b" and find its norm to get
+ an idea of how well we've converged. Put this in later
+ but we can do it using MatMultAdd() after we flip the sign
+ on b with VecScale() (into res, a new vector, then find the
+ norm of res with VecNorm()
+ */
+
+ /* Now since we now have local layout, we can just get the values
+ from the soln vector now matter how many processors we are
+ on (eg, the VecCreateMPI has made the solution local) */
+
+ VecGetArray(soln,&values);
+ for (k=kmin;k<kmax;k++) {
+ for (j=jmin;j<jmax;j++) {
+ for (i=imin;i<imax;i++) {
+ /* Set up indices */
+ int ijk; /* The data point for the array */
+ int ig, jg, kg; /* The position in global space */
+ int I,J; /* The col and row in the matrix */
+
+ CCTK_REAL rhsval = 0;
+
+ /* these guys are easy */
+ ijk = DATINDEX(pughGH,i,j,k);
+ if (wsp[ijk] >= 0) {
+ ig = i + GH->cctk_lbnd[0];
+ jg = j + GH->cctk_lbnd[1];
+ kg = k + GH->cctk_lbnd[2];
+
+ /* Now this one. "Fortran-order" the matrix. But remember
+ we have stripped off the ghost zones. Hence ig-1 and nxs...
+ */
+ I =wsp[ijk]-startpoint;
+ ell_field[ijk] = values[I];
+ }
+ }
+ }
+ }
+
+ VecRestoreArray(soln,&values);
+
+ /* Sync var in CCTK speak. */
+ pugh_SyncGroup(GH,"ellpetsc::petscworkspace");
+
+ /* And finally free up the matrix memory FIXME
+ ierr = MatReleaseValuesMemory(A); CHKERRA(ierr); */
+
+ /* This code is not used anymore */
+ if (verbose) CCTK_INFO("Destroying Matrices");
+ ierr = SLESDestroy(sles); CHKERRA(ierr);
+ ierr = VecDestroy(soln); CHKERRA(ierr);
+ ierr = VecDestroy(b); CHKERRA(ierr);
+ ierr = MatDestroy(A[0]); CHKERRA(ierr);
+ free(A);
+
+ /*$PetscFinalize();$*/
+
+ if (verbose) CCTK_INFO("LEAVING ELLPETSC");
+
+}
+
+