diff options
author | lanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588> | 1999-09-15 08:15:56 +0000 |
---|---|---|
committer | lanfer <lanfer@1d96b42b-98df-4a6a-9d84-1b24288d4588> | 1999-09-15 08:15:56 +0000 |
commit | f931c69d368ae51b5d3b8f483cd73f5a4231ad6a (patch) | |
tree | 0f2f8aa298a4b359bc8db90108cc502cc59348fe | |
parent | 777dd97165113011b9e1d0ca558217dafe9e7aae (diff) |
a flat space solver, very beta
git-svn-id: http://svn.cactuscode.org/arrangements/CactusElliptic/EllPETSc/trunk@8 1d96b42b-98df-4a6a-9d84-1b24288d4588
-rw-r--r-- | src/petsc_flat_solver.c | 691 |
1 files changed, 691 insertions, 0 deletions
diff --git a/src/petsc_flat_solver.c b/src/petsc_flat_solver.c new file mode 100644 index 0000000..890289a --- /dev/null +++ b/src/petsc_flat_solver.c @@ -0,0 +1,691 @@ + +#include "cctk.h" +#include "cctk_parameters.h" +#include "cctk_Flesh.h" + +#include "assert.h" +#include "mpi.h" +#include "sles.h" + +#include "CactusPUGH/PUGH/src/include/pugh.h" + + +#define DEBUG + +/*Don't know what these actually mean! FIXME */ +#define ELLCONV_ABSOLUTE 0 +#define ELLCONV_RELATIVE 1 +#define ELLCONV_EITHER 2 + +/* A few convenient macros */ +#define a(i,j,k) a[i+1 + 3*(j+1 + 3*(k+1))] +#define SQR(a) ((a)*(a)) + + + +/* Place the matrix in global space so we can keep it around + with the memory stripped out. Thanks, Barry! */ + +static int trips=0; +static Mat *A; /* linear system matrix */ +static Vec soln, b; /* approx solution, RHS */ +static SLES sles; /* linear solver context */ + + + +void petsc_flat(cGH *GH, int *FieldIndex, int *MIndex, int *NIndex, + int *AbsTol, int *RelTol) { + + + DECLARE_CCTK_PARAMETERS /* CCTK passed parameters */ + + + void *GetDataPtr_NextTL(cGH *GH, const char *field); + int CCTK_GetArgc(void); /* PETSc wnats to know the prog. name */ + void CCTK_GetArgv(char **argv); /* and parameters, don't think they can be passed through */ + + + PC pc; /* preconditioner context */ + KSP ksp; /* Krylov subspace method context */ + int num_A; /* Number of A-arrays as needed ny Dan's MG */ + + + double norm; /* norm of solution error */ + int ierr; /* Check the return status of petsc */ + int retcode; /* Check the return status of CCTK */ + + double a[27]; /* The stencil array */ + int idx[27]; /* The column of the stencil array */ + int rank; /* Rank of the matrix/vector */ + int its; /* Number of iterations */ + + /* Loopers */ + int i,j,k,l,m,n,ll; + + /* loop limits put in for stencil_w !=1 Ed Evans 1/19/98 */ + int imin,imax,jmin,jmax,kmin,kmax,interior; + + pGH *pughGH; /* The pugh Extension handle */ + int myproc; /* out processor */ + + /* Tolerances */ + CCTK_REAL rtol, atol, tolerance; + + /* Values to assemble the matrix */ + CCTK_REAL two=2.0, four=4.0, tmp; + CCTK_REAL pm4, psixp,psiyp,psizp,det; + CCTK_REAL dx,dy,dz; + CCTK_REAL uxx,uxy,uxz,uyy,uyz,uzz; + CCTK_REAL Gxxx,Gxxy,Gxxz,Gxyy,Gxyz,Gxzz; /* Christoffels */ + CCTK_REAL Gyxx,Gyxy,Gyxz,Gyyy,Gyyz,Gyzz; + CCTK_REAL Gzxx,Gzxy,Gzxz,Gzyy,Gzyz,Gzzz; + CCTK_REAL dxxx,dxxy,dxxz,dxyy,dxyz,dxzz; + CCTK_REAL dyxx,dyxy,dyxz,dyyy,dyyz,dyzz; + CCTK_REAL dzxx,dzxy,dzxz,dzyy,dzyz,dzzz; + CCTK_REAL *values; + + int nxs,nys,nzs; /* Size of the grid with stencils off... */ + int startpoint=0; /* My starting index (per proc) */ + int endpoint; /* One more than my end */ + int pstart, pend; /* A check for PETSc layout */ + int pvstart, pvend; /* A check for PETSc layout */ + int verbose; /* Is the solver verbose */ + int debug; /* Is the solver debug-verbose */ + int octant; /* Apply octant BCs inside */ + int conformal; /* Do we have conformal metric ? */ + int nabla_form; /* Which form of the nable */ + int reuse; /* Do PETSc reuse tricks? */ + int matnormalize; /* Normalize the central mat value to one? */ + CCTK_REAL ac; /* Storage for a(0,0,0) for renorm */ + int pctype, ksptype; /* Which PC and KSP to use. See below for + monster nested if statement. */ + int PetscTolStyle; + + /* For the upper metric form of nabla */ + CCTK_REAL *uxx3, *uxy3, *uxz3, *uyy3, *uyz3, *uzz3; + + + /* Pointers to the data of : petsc workspace/ the GF to solve / + metric / psi / derivs(psi) / Mlinear / Nlinear(source) */ + CCTK_REAL *wsp =NULL, *ell_field=NULL; + + CCTK_REAL *Mlin=NULL, *Nlin=NULL; + + int Mstorage=0, Nstorage=0; + + + octant = CCTK_Equals(domain,"octant"); + verbose = CCTK_Equals(petsc_verbose,"yes")|| + CCTK_Equals(petsc_verbose,"debug"); + debug = CCTK_Equals(petsc_verbose,"debug"); + reuse = 0; + matnormalize = 0; + + /* FIXME, the TolAbs/TolRel will be evaluated here */ + PetscTolStyle=0; + tolerance =AbsTol[0]; + + /* Get the link to pugh Extension */ + pughGH = (pGH*)GH->extensions[CCTK_GHExtensionHandle("PUGH")]; + if (!pughGH) CCTK_WARN(0,"ETERNAL ERROR: Cannot find PUGH Extension Handle\n"); + + /* Things to do on first iteration */ + if (trips==0) { + int CCTK_GetCommandLine(char ***outargv); + int argc; + char **argv; + +#ifdef DEBUG + if (debug) + printf("PETSc: initial trip: %d \n",trips); +#endif + + argc = CCTK_GetCommandLine(&argv); + ierr = PetscSetCommWorld(pughGH->PUGH_COMM_WORLD); CHKERRA(ierr); + PetscInitialize(&argc,&argv,NULL,NULL); + + CCTK_INFO("PETSc initialized"); + } + + trips++; + + num_A = 1; + A=(Mat*) malloc (sizeof(Mat)*num_A); + + /* Get the data ptr of these GFs, They all have to be + on the same timelevel */ + + /* derive the metric data pointer from the index array. + Note the ordering in the metric */ + ell_field = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*FieldIndex); + + /* if we have a negative index, this GF is not needed, + there for don't even look for it. when index positive, + set flag Mstorage=1, dito for N */ + if (*MIndex>=0) { + Mlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*MIndex); + Mstorage = 1; + } + if (*NIndex>=0) { + Nlin = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,*NIndex); + Nstorage = 1; + } + wsp = (CCTK_REAL*) CCTK_VarDataPtrI(GH,0,CCTK_VarIndex("ellpetsc::wsp")); + + /* initialize the linear index lookup table, after it's + filled up (below), the -1 indicates a boundary */ + for (i=0;i<pughGH->npoints;i++) wsp[i] = -1.0; + + /* Get myproc from the CCTK, get the gridspacing */ + myproc = pugh_MyProc(GH); + dx = GH->cctk_delta_space[0]; + dy = GH->cctk_delta_space[1]; + dz = GH->cctk_delta_space[2]; + + /* We fix the lower and upper boundary indices, that actually "active" in + the sense that they are no ghostzones: */ + imin=((pughGH->neighbors[pughGH->myproc][XDM]<0) && !(octant) ? 1 : + pughGH->nghostzones); + imax=((pughGH->neighbors[pughGH->myproc][XDP]<0) ? pughGH->lnx-1 : + pughGH->lnx-GH->cctk_nghostzones[0]); + + jmin=((pughGH->neighbors[pughGH->myproc][YDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[1]); + jmax=((pughGH->neighbors[pughGH->myproc][YDP]<0) ? pughGH->lny-1 : + pughGH->lny-GH->cctk_nghostzones[1]); + + kmin=((pughGH->neighbors[pughGH->myproc][ZDM]<0) && !(octant) ? 1 : + GH->cctk_nghostzones[2]); + kmax=((pughGH->neighbors[pughGH->myproc][ZDP]<0) ? pughGH->lnz-1 : + pughGH->lnz-GH->cctk_nghostzones[2]); + + /* We need to get the global index of gridpoints (gps) owned on my proc. + For that we have to know how many gps are there before my processors + (i<myproc), so we count them here (that's minus the ghostszones + (we use PUGH's "rn" variables (number of gp on proc[#] in + direction [i]) and subtract the ghostzones if nec.) */ + + for (i=0;i<myproc;i++) { + + nxs=pughGH->rnx[i]; + nxs=((pughGH->neighbors[i][XDM]<0) && !(octant) ? nxs-1 : + nxs-GH->cctk_nghostzones[0]); + nxs=((pughGH->neighbors[i][XDP]<0) ? nxs-1 : nxs-GH->cctk_nghostzones[0]); + + nys=pughGH->rny[i]; + nys=((pughGH->neighbors[i][YDM]<0) && !(octant) ? nys-1 : + nys-GH->cctk_nghostzones[1]); + nys=((pughGH->neighbors[i][YDP]<0) ? nys-1 : nys-GH->cctk_nghostzones[1]); + + nzs=pughGH->rnz[i]; + nzs=((pughGH->neighbors[i][ZDM]<0) && !(octant) ? nzs-1 : + nzs-GH->cctk_nghostzones[2]); + nzs=((pughGH->neighbors[i][ZDP]<0) ? nzs-1 : nzs-GH->cctk_nghostzones[2]); + + printf("PROC: %d nxyzs %d %d %d \n",i,nxs,nys,nzs); + startpoint += nxs*nys*nzs; + } + +#ifdef DEBUG + printf("STARTPOINT: %d \n",startpoint); +#endif + + /* ...we save that as a start ...*/ + endpoint = startpoint; + + /* ...and continue running over our own region to get our endpoint */ + for (k=imin;k<kmax;k++) + for (j=jmin;j<jmax;j++) + for (i=imin;i<imax;i++) + wsp[DATINDEX(pughGH,i,j,k)] = endpoint++; +#ifdef DEBUG + printf("ENDPOINT: %d \n",endpoint); +#endif DEBUG + + /* So now each point has a unique index of its own. If we + do a sync that means each processor knows the indiced + in the ghost zones (eg, on its neighbors) in a FD stencil + of 1 */ + retcode = pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + + /* So woohoo. Now for each point in our ijk space, we have + information about our row in the matrix (workspace->data[ijk]) + as well as the column for the stencil of the neighbors + (workspace->data[DATINDEX(GH,i+1,j,k)] etc...). So onwards */ + + nxs = pughGH->nx-2; + nys = pughGH->ny-2; + nzs = pughGH->nz-2; + +#ifdef DEBUG + if (debug) { + printf("nxyzs %d %d %d \n",nxs,nys,nzs); + printf("lnxyz: %d %d %d \n",pughGH->lnx,pughGH->lny,pughGH->lnz); + } +#endif + + /* Suck off boundaries... */ + rank = nxs*nys*nzs; + + + /* Create the petsc matrix A and vectors x and b efficiently. + + Now for efficiency we have to be really careful here. By + setting up the index array, we are abandoning the 19-way + tri-diagonal system so we can lay out our matrices on the + processors with the data. (One one proc we will still + get 19 way tridiagonal). So we want each processor + to own only part of the matrix/vector but we know exactly + what it is, eg, from startpoint to endpoint-1 is on our + local processor (that is, row-wise ; all columns are local). + + So we can use MatCreateMPIAIJ for exactly this, and + VecCreateMPI for the vectors. + */ + + /* FIXME: perhaps only set up only on first iteration, this was a hack + by PAUL, using some inofficical petsc code, check if this is + "official", yet */ + + /*$if (trips == 0 || reuse == 0) {$*/ + if (verbose) CCTK_INFO("Creating Matrices and Vectors ...."); + ierr = MatCreateMPIAIJ(pughGH->PUGH_COMM_WORLD, + (endpoint-startpoint), /* # of rows */ + (endpoint-startpoint), /* This is confusing */ + rank,rank, /* Matrix size */ + 19,PETSC_NULL, /* Diagonal */ + 19,PETSC_NULL, /* Off diagonal */ + &A[0]); /* The output */ + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&soln); + CHKERRA(ierr); + ierr = VecCreateMPI(pughGH->PUGH_COMM_WORLD,(endpoint-startpoint),rank,&b); + CHKERRA(ierr); + /*$}$*/ + /*$else {$*/ + /* ierr = MatRestoreValuesMemory(A); */ + /*$CCTK_WARN(0,"no reuse"); + CHKERRA(ierr); + }$*/ + + + + + /* Compare the PETSc layout to Cactus, this better be a match */ + ierr = VecGetOwnershipRange(soln,&pvstart,&pvend); + ierr = MatGetOwnershipRange(A[0],&pstart,&pend); + printf("CAC M-Layout: %d %d \n",startpoint, endpoint); + printf("PET M-Layout: %d %d \n",pstart, pend); + printf("PET V-Layout: %d %d \n",pvstart,pvend); + if (pstart != startpoint && pend != endpoint) + CCTK_WARN(1,"WARNING: PETSC and data layouts differ! (why??)\n"); + + + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + + if (wsp[DATINDEX(pughGH,i,j,k)] >= 0) { + + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + CCTK_REAL rhsval = 0; + + for (I=0;I<27;I++) a[I] = 0.0; + a( 0, 0, 0) = -10.38; + a( 1, 0, 0) = 1.56; /* ae */ + a(-1, 0, 0) = 1.56; /* aw */ + a( 0, 1, 0) = 1.56; /* an */ + a( 0,-1, 0) = 1.56; /* as */ + a( 0, 0, 1) = -1.56; /* at */ + a( 0, 0,-1) = -1.56; /* ab */ + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); /* get linear index */ + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Get the row we are working on */ + I = wsp[ijk]; + + /* M phi */ + if (Mstorage) + a(0,0,0) = a(0,0,0) - Mlin[ijk]; + + /* Great now set the values of the matrix. This is + really painful due to the boundaries (here we force + dirichlet). */ + VecSetValues(soln,1,&I,&(ell_field[ijk]),INSERT_VALUES); + + /* Put in the octant boundary conditions. + + We do these by reflecting the -1 stencil if we are at a + boundary at -1. That is, imagine the 1D laplace equation + with the boundary condition a(0) = a(1). This means + the point 1 stencil (which is our first stencil in + the matrix) becomes + + a(0) + a(2) - 2 a(1) = rho(i) * deltax -> + a(2) - 2 a(1) + a(1) = rho(i) * deltax + + OK so think about this with a general stenci + + S_0 a_0 + S_1 a_1 + S_2 a_2 = rho_1 -> + S_0 a_1 + S_1 a_1 + S_2 a_2 = rho_1 -> + (S_0 + S_1) a_1 + S_2 a_2 = rho_1 + + eg, S_0 -> 0 and S_1 -> S_1 + S_0 + + Great, so implement this in 3D. It is a bit trickier, + since we don't want to zero neighbors in the wrong + direction, but not impossible. + + */ + + if (ijk==1918) { + for (l=-1;l<=1;l++) + for (m=-1;m<=1;m++) + for (n=-1;n<=1;n++) printf(" (%d %d %d): %f \n",l,m,n,a(l,m,n)); + printf("\n"); + } + + if (octant) + for (l=-1;l<=0;l++) + for (m=-1;m<=0;m++) + for (n=-1;n<=0;n++) + if (l*m*n == 0) + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0 && + (ig == imin || jg == jmin || kg == kmin)) + { + /* We are on an inner boundary point, eg, + at an inner face */ + int ll,mm,nn; + /* Only zero the guys at the boundaries */ + ll = (ig == imin ? 0 : l); + mm = (jg == jmin ? 0 : m); + nn = (kg == kmin ? 0 : n); + a(ll,mm,nn) += a(l,m,n); + a(l,m,n) = 0.0; + } + + /* renormalize */ + if (matnormalize) { + ac = a(0,0,0); + for (J=0;J<27;J++) a[J] = a[J] / ac; + } + else ac = 1; + + + /* This is the new way-clever look. Note it relies + heavily on the index array in the workspace and + on multiple processors it will *NOT* make a + 19-way banded matrix. */ + + for (l=-1;l<=1;l++) + for (m=-1;m<=1;m++) + for (n=-1;n<=1;n++) { + if (l*m*n == 0) { /* This is the 19 point ... if none are + * zero, then we have no stencil here. + */ + if (wsp[DATINDEX(pughGH,i+l,j+m,k+n)] < 0) { + /* This is a boundary. */ + rhsval += a(l,m,n) * ell_field[DATINDEX(pughGH,i+l,j+m,k+n)]; + } + else { + J = wsp[DATINDEX(pughGH,i+l,j+m,k+n)]; + ierr = MatSetValues(A[0],1,&I,1,&J,&(a(l,m,n)),INSERT_VALUES); + CHKERRA(ierr); + } + } + } + + if (Nstorage) + rhsval = -rhsval + Nlin[ijk] / ac; + + ierr = VecSetValues(b,1,&I,&rhsval,INSERT_VALUES); + CHKERRA(ierr); + + } + } + } + } + + if (verbose) CCTK_INFO ("Assembling the vectors"); + ierr = MatAssemblyBegin(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyBegin(soln); CHKERRA(ierr); + ierr = VecAssemblyBegin(b); CHKERRA(ierr) + ierr = MatAssemblyEnd(A[0],MAT_FINAL_ASSEMBLY); CHKERRA(ierr); + ierr = VecAssemblyEnd(soln); CHKERRA(ierr); + ierr = VecAssemblyEnd(b); CHKERRA(ierr); + ierr = MatSetOption(A[0],MAT_NO_NEW_NONZERO_LOCATIONS); CHKERRA(ierr); + + if (nabla_form == 3) { + if (verbose) + printf ("Freeing upper metric storage\n"); + free(uxx3); + free(uxy3); + free(uxz3); + free(uyy3); + free(uyz3); + free(uzz3); + } + if (trips==0) + OptionsSetValue("-ksp_monitor",""); + + CCTK_INFO("CREATING SLES"); + ierr = SLESCreate(pughGH->PUGH_COMM_WORLD,&sles); CHKERRA(ierr); + + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. At a later date + we can probably optimize this using SAME_NONZERO_PATTERN + and a static trip-though flag (eg, on the second trip + through do something different). Also we need to think about + using A to precondition itself. Since I don't know what this + means, we'll leave it for now. */ + + CCTK_INFO("CREATING SLES OPERATOR"); + ierr = SLESSetOperators(sles,A[0],A[0],DIFFERENT_NONZERO_PATTERN); CHKERRA(ierr); + + /* Set linear solver defaults for this problem. Later this + should be replaced/modified with appropriate parsing from the + parameter parser. For now it is not. These defaults are + reasonable, I hope. */ + CCTK_INFO("SLESGet KSP/PC"); + ierr = SLESGetKSP(sles,&ksp); CHKERRA(ierr); + ierr = SLESGetPC(sles,&pc); CHKERRA(ierr); + + /* Get the PC Type */ + if (CCTK_Equals(petsc_PC_type,"PCJACOBI")) + ierr = PCSetType(pc,PCJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCBJACOBI")) + ierr = PCSetType(pc,PCBJACOBI); + else if (CCTK_Equals(petsc_PC_type,"PCICC")) + ierr = PCSetType(pc,PCICC); + else if (CCTK_Equals(petsc_PC_type,"PCILU")) + ierr = PCSetType(pc,PCILU); + else if (CCTK_Equals(petsc_PC_type,"PCASM")) + ierr = PCSetType(pc,PCASM); + else if (CCTK_Equals(petsc_PC_type,"PCLU")) + ierr = PCSetType(pc,PCLU); + else if (CCTK_Equals(petsc_PC_type,"PCNONE")) + ierr = PCSetType(pc,PCNONE); + else { + CCTK_WARN(1,"Don't understand petsc_PC_type. Using PCNONE\n"); + ierr = PCSetType(pc,PCNONE); + } + CHKERRA(ierr); + + + /* Now the same thing for the KSP Type */ + if (CCTK_Equals(petsc_KSP_type,"KSPBCGS")) + ierr = KSPSetType(ksp,KSPBCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPRICHARDSON")) + ierr = KSPSetType(ksp,KSPRICHARDSON); + else if (CCTK_Equals(petsc_KSP_type,"KSPCHEBYCHEV")) + ierr = KSPSetType(ksp,KSPCHEBYCHEV); + else if (CCTK_Equals(petsc_KSP_type,"KSPCG")) + ierr = KSPSetType(ksp,KSPCG); + else if (CCTK_Equals(petsc_KSP_type,"KSPGMRES")) + ierr = KSPSetType(ksp,KSPGMRES); + else if (CCTK_Equals(petsc_KSP_type,"KSPCGS")) + ierr = KSPSetType(ksp,KSPCGS); + else if (CCTK_Equals(petsc_KSP_type,"KSPTFQMR")) + ierr = KSPSetType(ksp,KSPTFQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPTCQMR")) + ierr = KSPSetType(ksp,KSPTCQMR); + else if (CCTK_Equals(petsc_KSP_type,"KSPCR")) + ierr = KSPSetType(ksp,KSPCR); + else if (CCTK_Equals(petsc_KSP_type,"KSPLSQR")) + ierr = KSPSetType(ksp,KSPLSQR); + else { + CCTK_WARN (1,"I don't understand petsc_KSP_type. Using BiCGSTAB\n"); + ierr = KSPSetType(ksp,KSPBCGS); + } + CHKERRA(ierr); + + + + /* Set up tolerances */ + + if (PetscTolStyle == ELLCONV_ABSOLUTE) { + rtol = 1.0e-15; + atol = tolerance; + } else if (PetscTolStyle == ELLCONV_RELATIVE) { + rtol = tolerance; + atol = 1.0e-15; + } else if (PetscTolStyle == ELLCONV_EITHER) { + rtol = tolerance; + atol = tolerance; + } else { + printf("PETSC Solver: PetscTolStyle set incorrectly [%d]\n", + PetscTolStyle); + } + + ierr = KSPSetTolerances(ksp,rtol,atol,PETSC_DEFAULT, + PETSC_DEFAULT); + CHKERRA(ierr); + + + /* We are warned in the manual that + + The default technique for orthogonalization of the Hessenberg matrix + in GMRES is the modified Gram-Schmidt method, which employs many + VecDot() operations and can thus be slow in parallel. A fast approach + is to use the unmodified Gram-Schmidt method, which can be set with + + ierr = KSPGMRESSetOrthogonalization(KSP ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + + or the options database command + -ksp_gmres_unmodifiedgramschmidt. Note that this algorithm is + numerically unstable, but may deliver much better speed + performance. One can also use unmodifed Gram-Schmidt with + iterative refinement, by setting the orthogonalization routine, + KSPGMRESIROrthog(), by using the command line option + -ksp_gmres_irorthog. + + So this my not be a smart thing to do. But lets put it here + so we can turn it on or off later. + + */ + /*$ierr = KSPGMRESSetOrthogonalization(ksp, + KSPGMRESUnmodifiedGramSchmidtOrthogonalization); + CHKERRA(ierr);$*/ + + /* We also learn that ... + + By default, KSP assumes an initial guess of zero by zeroing the + initial value for the solution vector that is given. To use a + nonzero initial guess, the user must call + + ierr = KSPSetInitialGuessNonzero(KSP ksp); + + */ + CCTK_INFO("KSPSetInitialGuess\n"); + ierr = KSPSetInitialGuessNonzero(ksp); CHKERRA(ierr); + + /* + Set runtime options. Since we don't use PETSC runtime options + but rather use the CACTUS parameter parser, we do this before + we parse things out. But that may not be such a good idea. + So lets put it here. + */ + ierr = SLESSetFromOptions(sles); CHKERRA(ierr); + + + /* OK so finally we are able to ... */ + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Solve the linear system + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + CCTK_INFO("SLES solve..."); + ierr = SLESSolve(sles,b,soln,&its); CHKERRA(ierr); + + /* Here we can form a "res = Ax - b" and find its norm to get + an idea of how well we've converged. Put this in later + but we can do it using MatMultAdd() after we flip the sign + on b with VecScale() (into res, a new vector, then find the + norm of res with VecNorm() + */ + + /* Now since we now have local layout, we can just get the values + from the soln vector now matter how many processors we are + on (eg, the VecCreateMPI has made the solution local) */ + + VecGetArray(soln,&values); + for (k=kmin;k<kmax;k++) { + for (j=jmin;j<jmax;j++) { + for (i=imin;i<imax;i++) { + /* Set up indices */ + int ijk; /* The data point for the array */ + int ig, jg, kg; /* The position in global space */ + int I,J; /* The col and row in the matrix */ + + CCTK_REAL rhsval = 0; + + /* these guys are easy */ + ijk = DATINDEX(pughGH,i,j,k); + if (wsp[ijk] >= 0) { + ig = i + GH->cctk_lbnd[0]; + jg = j + GH->cctk_lbnd[1]; + kg = k + GH->cctk_lbnd[2]; + + /* Now this one. "Fortran-order" the matrix. But remember + we have stripped off the ghost zones. Hence ig-1 and nxs... + */ + I =wsp[ijk]-startpoint; + ell_field[ijk] = values[I]; + } + } + } + } + + VecRestoreArray(soln,&values); + + /* Sync var in CCTK speak. */ + pugh_SyncGroup(GH,"ellpetsc::petscworkspace"); + + /* And finally free up the matrix memory FIXME + ierr = MatReleaseValuesMemory(A); CHKERRA(ierr); */ + + /* This code is not used anymore */ + if (verbose) CCTK_INFO("Destroying Matrices"); + ierr = SLESDestroy(sles); CHKERRA(ierr); + ierr = VecDestroy(soln); CHKERRA(ierr); + ierr = VecDestroy(b); CHKERRA(ierr); + ierr = MatDestroy(A[0]); CHKERRA(ierr); + free(A); + + /*$PetscFinalize();$*/ + + if (verbose) CCTK_INFO("LEAVING ELLPETSC"); + +} + + |