1 files changed, 1455 insertions, 0 deletions
diff --git a/src/shmgp.F77 b/src/shmgp.F77
new file mode 100644
index 0000000..1982591
--- /dev/null
+++ b/src/shmgp.F77
@@ -0,0 +1,1455 @@
+
+c----------------------------------------------------------------------
+      subroutine umgs2(
+     + ac,aw,as,ae,an,asw,ase,ane,anw,q,f,pu,pd,pc,ru,rd,rc,gam,np2,
+     + ifd59,ifmg,ncyc,tol,nman,im,jm,id5,id9,idi,m,iskip,rmax,
+     + ipc,irc,irurd)
+      implicit real*8(a-h,o-z)
+c----------------------------------------------------------------------
+cdir$ noinline
+c** SUBROUTINE UMGS2
+c
+c** COPYRIGHT: Ecodynamics Research Associates, Inc.
+c
+c** Date written: June, 1990
+c** Author:  Steve Schaffer
+c            Mathematics Department
+c            New Mexico Tech
+c            Socorro, NM  87801
+c            505-835-5811
+c
+c** DESCRIPTION:
+c     umgs2 is a black box symmetric matrix solver.  It is written
+c     in unsymmetric storage mode and can be used to solve mildly
+c     nonsymmetric problems.  The user provides a matrix and right hand
+c     side vector corresponding to a 5 or 9 point finite difference/
+c     finite volume discretization of a symmetric second order PDE.
+c     umgs2 will construct a sequence of coarse grids and coarse
+c     grid operators and then solve the matrix equation using a
+c     y-direction semi-coarsening multigrid algorithm and return
+c     the solution vector.  If a sequence of matrix problems are
+c     to be solved using the same matrix, computational time can
+c     be saved by skipping the construction of the coarse grid
+c     information in subsequent calls to umgs2. 
+c
+c     The matrix on the finest grid is stored in the arrays ac,aw,as,
+c     an,asw,ase,ane and anw.  The difference stencil at the point
+c     (i,j) given by
+c
+c         nw  n  ne        anw(i,j)   an(i,j)   ane(i,j)
+c          w  c  e    =    aw(i,j)    ac(i,j)   aw(i,j)
+c         sw  s  se        asw(i,j)   as(i,j)   ase(i,j)
+c
+c     If the difference stencil on the fine grid is a 5 point stencil
+c     then the arrays asw,ase,ane,anw are not used and the
+c     stencil is given by
+c
+c             n                     an(i,j)
+c          w  c  e    =    aw(i,j)  ac(i,j)   ae(i,j)
+c             s                     as(i,j)
+c
+c     However, asw,ase,ane,anw still need to be dimensioned (by id9)
+c     in the calling program as they are used in the coarse grid
+c     calculations.
+c
+c** STORAGE
+c     It is assumed that a set of ficticious points have been defined
+c     along the entire boundary.  These points have nothing to do with
+c     the solution and are used for programming convenience and
+c     vectorization purposes.  Storage is allocated for the stencil
+c     elements ac,aw,as,asw,ase, the solution vector, q, and the
+c     right hand side vector, f, at these ficticious points.  The
+c     stencils at these ficticious points and all stencil connections
+c     to them are set to zero in the subroutine useta which is called
+c     by umgs2.  The computational grid is depicted by
+c
+c         x   x   x   x              x   x   x   x
+c
+c         x   *   *   *              *   *   *   x
+c
+c         x   *   *   *              *   *   *   x
+c    .
+c    .
+c    .
+c         x   *   *   *              *   *   *   x
+c
+c         x   *   *   *              *   *   *   x
+c
+c         x   x   x   x              x   x   x   x
+c
+c     where x depicts the ficticious points and * depicts the interior
+c     points.  The total storage requirements for the fine grid problem
+c     is then 5*im*jm for 5 point stencils and 7*im*jm for 9 point
+c     stencils.  The total storage requirements for the multigrid
+c     solution is approximately 2 to 3 times that of the storage
+c     requirements of the fine grid problem. (See DIMENSION PARAMETERS).
+c     Note:  The first im*jm elements of the arrays ac,aw,as,[asw,ase],
+c     q and f correspond to the finest grid.
+c
+c** DIMENSION PARAMETERS
+c     The arrays ac,aw,ae,asw,ase,q,f,pu,pd and pc are dimensioned as one
+c     dimensional arrays in the calling program.  They are dimensioned
+c     as two dimensional arrays in the working subroutines.  The one
+c     dimensional storage of the arrays, say q, follows: n=(j-1)*jm+i,
+c     where n is the element location in the one dimensional storage of
+c     q corresponding to the (i,j)th element of the two dimensional
+c     storage of q and jm is the number of grid points in the j
+c     direction (including the two ficticious points).
+c
+c     The dimension parameters are id5, id9, idi and idg.  They can be
+c     determined by running the companion program MSS2DIM.F.
+c      id5 - Integer variable.
+c            Dimension of the arrays ac,aw,as,ae,an,q and f in the
+c            calling program.  id5 is the total number of grid points
+c            on the finest grid and all coarser grids.
+c      id9 - Integer variable.
+c            Dimension of the arrays asw,ase,ane,anw in the calling
+c            program.  If ifd59=5 then id9=idi.  If ifd59=9 then
+c            id9=id5.
+c      idi - Integer variable.
+c            Dimension of the work arrays pu and pd in the calling
+c            program.  idi is the total number of grid points on all
+c            of the coarser grids.
+c      idg - Integer variable.
+c            Dimension of the work array gam in the calling program.
+c            It is set to the value im, the number of grid points
+c            in the i-direction on the finest grid.
+c
+c** INPUT
+c            (Note: all variable types are set implicitly)
+c ac,aw,as
+c    ae,an - Real arrays.  Dimensioned (id5) in calling program.
+c            See comments in DESCRIPTION and DIMENSION PARAMETERS.
+c  asw,ase
+c  ane,anw - Real arrays.  Dimensioned (id9) in calling program.
+c            See comments in DESCRIPTION and DIMENSION PARAMETERS.
+c        f - Real array.  Dimensioned (id5) in calling program.
+c            f contains the right hand side vector of the matrix
+c            equation to be solved by umgs2.
+c        q - Real array.  Dimensioned (id5) in calling program.
+c            If ifmg=0, q contains the initial guess on the fine
+c            grid.  If ifmg=1, the initial guess on the fine grid
+c            is determined by the full multigrid process and the
+c            value of q on input to umgs2 not used.
+c   ifd59 -  Integer variable.
+c            =5 - means a 5-point finite difference stencil (ac,aw and
+c                 as) is defined on the finest grid by the user.
+c            =9 - means a 9-point finite difference stencil (ac,aw,as,
+c                 asw, ase) is defined on the finest grid by the user.
+c    ifmg - Integer variable.
+c           =0 - The full multigrid algorithm is not used to obtain a
+c                good initial guess on the fine grid.
+c           =1 - The full multigrid algorithm is used to obtain a good
+c                initial guess on the fine grid.
+c    ncyc - Integer variable.
+c           The maximum number of multigrid v-cycles to be used.
+c           If the maximum norm of the residual is not less than tol
+c           at the end of ncyc cycles, the algorithm is terminated.
+c     tol - Real variable.
+c           >0 - The maximum norm of the residual is calculated at the
+c           end of each multigrid cycle.  The algorithm is terminated
+c           when this maximum becomes less than tol or when the maximum
+c           number of iterations (see ncyc) is exceeded.  It is up to
+c           the user to provide a meaningfull tolerance criteria for
+c           the particular problem being solved.
+c           =0 - Perform ncyc multigrid cycles.  Calculate and print
+c           the maximum norm of the residual after each cycle.
+c           =-1. - Perform ncyc multigrid cycles.  The maximum norm of
+c           the final residual is calculated and returned in the
+c           variable rmax in the calling list of umgs2.
+c           =-2. - Perform ncyc multigrid cycles.  The maximum norm of
+c           the residual is not calculated.
+c   iskip - Integer variable.
+c           =0 - The coarse grid information, coarse grid operators
+c                and interpolation coefficients are calculated by
+c                umgs2.  This information is stored in the arrays
+c                ac, aw, as, asw, ase, pu, pd, np2 and the variable m
+c                and returned to the calling program.
+c           =1 - The calculation of the coarse grid information, coarse
+c                grid operators and interpolation coefficients is
+c                skipped.  This option would be used when umgs2 has
+c                been called with iskip=0 and is being called again
+c                to solve a system of equations with the same matrix.
+c                This would be the case in, say, parabolic problems
+c                with time independent coefficients.
+c           =-1 -The set up of pointers (ugrdfn) is skipped.  Coarse grid
+c                operators and interpolation coefficients are calculated
+c                and the given matrix equation is solved.  This option
+c                would be used when umgs2 has been called with iskip=0
+c                and is being called again to solve a system of
+c                equations with a different matrix of the same
+c                dimensions.  This would be the case for, say,
+c                parabolic problems with time dependent coefficients.
+c           =-2 -The set up of pointers (ugrdfn) is skipped.  Coarse grid
+c                operators and interpolation coefficients are calculated
+c                and returned to the calling program.  No matrix solve.
+c     ipc - Integer variable.
+c           =0 or 1.
+c           ipc is a multigrid parameter which determines the type of
+c           interpolation to be used.  Usually ipc=1 is best.  However, if
+c           the boundary contition equations have been absorbed into the
+c           interior equations then ipc=0 can be used which results in a
+c           slightly more efficient algorithm.
+c    nman - Integer variable.
+c           =0 usually.
+c           =1 signals that the fine grid equations are singular for
+c           the case when homogeneous Neumann boundary conditions are
+c           applied along the entire boundary.  In this case, the
+c           difference equations are singular and the condition that
+c           the integral of q over the domain be zero is added to the
+c           set of difference equations.  This condition is satisfied
+c           by adding the appropriate constant vector to q on the fine
+c           grid.  It is assumed, in this case, that a well-defined
+c           problem has been given to mgss2, i.e. the integral of f
+c           over the domain is zero.
+c      im - Integer variable.
+c           The number of grid points in the x-direction (including two
+c           ficticious points)
+c      jm - Integer variable.
+c           The number of grid points in the y-direction (including two
+c           ficticious points)
+c    lout - Integer variable.
+c           = unit number of output file into which the maximum norm
+c             of the residual after each multigrid v-cycle is printed.
+c             Use:  common /iout/ lout
+c
+c** INPUT/OUTPUT
+c       q - Real array.  Dimensioned (id5)
+c           On input, if ifmg=0, q contains the initial guess on the
+c           finest grid for umgs2.  On output, q contains the final
+c           solution on the finest grid.
+c  ac-anw - Real arrays.  See DIMENSION.
+c           On input, ac, aw, as, [asw and ase] contain the stencil
+c           coefficients for the difference operator on the finest
+c           grid.  When the iskip=1 option is used, these arrays
+c           also are assumed to contain the coarse grid difference
+c           stencil coeficients.
+c           On output, when the iskip=0 option is used, the coarse
+c           grid stencil coeficients are returned in ac - ase.
+c
+c ru,rd,rc - Real work arrays.  Dimensioned (idi)
+c
+c pu,pd,pc - Real work arrays.  Dimensioned (idi).
+c           On input, when the iskip=1 option is used, these arrays
+c           are assumed to contain the interpolation coefficients
+c           used in the semi-coarsening multigrid algorithm.
+c           On output, when the iskip=0 option is used, the
+c           interpolation coeficients are returned in pu and pd.
+c     np2 - Integer work array.  Dimensioned np2(20,8).
+c           On input, when the iskip=1,-1 or -2 option is used, np2 is
+c           assumed to contain the grid information for umgs2.
+c           On output, when the iskip=0 option is used, the grid
+c           information for umgs2 is returned in np2.
+c** OUTPUT
+c    rmax - If tol.ge.-1., the final residual norm is returned in rmax.
+c
+c** SUBROUTINES CALLED BY UMGS2
+c
+c    - ugrdfn, ukey, uintad, urelax, urscal, ursrhs, useta
+c
+c** END OF DESCRIPTION OF UMGS2
+c .....................................................................
+      real*8 ac(id5),aw(id5),as(id5),ae(id5),an(id5),asw(id9),
+     + ase(id9),ane(id9),anw(id9),q(id5),f(id5)
+      real*8 pu(idi),pd(idi),pc(idi),gam(im)
+      integer np2(20,8)
+      real*8 ru(idi),rd(idi),rc(idi)
+      real*8 resid(0:40),confac(0:40)
+      common /io/ linp,lout
+c
+c-time           tsu0=second()
+         if(iskip.eq.0) then
+      call ugrdfn(m,ifd59,is5,is9,isi,np2,im,jm)
+         iquit=0
+           if(m.gt.20) then
+      iquit=1
+      write(lout,*) ' m=',m,' > 20 - np2 is dimensioned np2(m=20,8)'
+           endif
+           if(is5.gt.id5) then
+      iquit=1
+      write(lout,*) ' id5=',id5,' too small.  Should be set to',is5
+           endif
+           if(is9.gt.id9) then
+      iquit=1
+      write(lout,*) ' id9=',id9,' too small.  Should be set to',is9
+           endif
+           if(isi.gt.idi) then
+      iquit=1
+      write(lout,*) ' idi=',idi,' too small.  Should be set to',isi
+            endif
+            if(is5.lt.2*im*jm) then
+      iquit=1
+      write(lout,*) ' id5.lt.2*im*jm can cause problems in useta'
+      write(lout,*) ' this can be remedied by setting id5 larger'
+            endif
+      if(iquit.eq.1) return
+         endif
+         if(iskip.le.0) then
+c     ----------  interpolation and coarse grid operators -----------
+      do 5 k=m-1,1,-1
+cdir$ inline 
+      call ukey(k+1,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+      call ukey(k,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+      if(k.eq.m-1) n5cqf=n5c
+    5 call useta(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),ac(n5c),aw(n5c),as(n5c),ae(n5c),an(n5c),asw(n9c),
+     + ase(n9c),ane(n9c),anw(n9c),pu(nic),pd(nic),pc(nic),ru(nic),
+     + rd(nic),rc(nic),q(n5cqf),f(n5cqf),gam,
+     + im,jm,jmc,ifd,i9,j9,nman,k+1,m,jr,ipc,irc,irurd)
+         endif
+      if(iskip.eq.-2) return
+c
+         if(ifmg.ge.1) then
+      do 6 k=m-1,1,-1
+cdir$ inline 
+      call ukey(k+1,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+      call ukey(k,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+    6 call ursrhs(f(n5),f(n5c),pu(nic),pd(nic),pc(nic),ru(nic),
+     + rd(nic),rc(nic),im,jm,jmc,m,k+1,jr,irc)
+         endif
+c-time            tsu1=second()
+c-time            write(lout,*) ' time for setup =',tsu1-tsu0
+      l=1
+      if(ifmg.eq.0) l=m
+      k=l
+      mcyc=0
+      rmaxo=1.
+c     ----------   begin multigrid cycling  ----------------------------
+c
+      if(l.eq.1) go to 20
+cdir$ inline 
+   10 call ukey(k,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+cdir$ noinline 
+      call urelax(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),f(n5),q(n5),gam,
+     + im,jm,i9,j9,ifd,nman,k,m,jr,0,0)
+cdir$ inline 
+      call ukey(k-1,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+      call urscal(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),q(n5),f(n5),f(n5c),q(n5c),rc(nic),
+     + im,jm,jmc,ifd,i9,j9,k,m,jr,tol,rmax,ipc,irc)
+      if(k.eq.m.and.rmax.lt.tol) go to 60
+         if(k.eq.m.and.tol.ge.-.5) then
+      if(rmaxo.ne.0.) rate=rmax/rmaxo
+      rmaxo=rmax
+      if(mcyc.eq.0) rmax0=rmax
+      resid(mcyc)=rmax
+      confac(mcyc)=rate
+         endif
+      if(tol.eq.-.5) write(lout,*) ' down ',k,rmax
+      k=k-1
+      if(k.gt.1) go to 10
+c     ---------  solve coarsest grid  ----------------------------------
+c
+cdir$ inline 
+   20 call ukey(1,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+cdir$ noinline 
+      call urelax(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),f(n5),q(n5),gam,
+     + im,jm,i9,j9,ifd,nman,k,m,jr,0,0)
+         if(l.eq.1) go to 40
+c     ----------  interpolate correction to next finer grid  -----------
+c
+   30 k=k+1
+cdir$ inline 
+      call ukey(k,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+      call ukey(k-1,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+      call uintad(
+     + q(n5),q(n5c),pu(nic),pd(nic),im,jm,jmc,1,jr,ipc)
+      call urelax(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),f(n5),q(n5),gam,
+     + im,jm,i9,j9,ifd,nman,k,m,jr,0,0)
+         if(tol.eq.-.5) then
+      call urscal(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),q(n5),f(n5),f(n5c),q(n5c),rc(nic),
+     + im,jm,jmc,ifd,i9,j9,k,m,jr,tol,rmax,ipc,irc)
+      write(lout,*) ' up   ',k,rmax
+         endif
+      if(k.lt.l) go to 30
+      if(l.eq.m) go to 50
+c     ----------  interpolate solution to new finest grid l+1 in fmg  ----
+c
+   40 l=l+1
+      k=l
+cdir$ inline 
+      call ukey(l,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+      call ukey(l-1,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+      call uintad(
+     + q(n5),q(n5c),pu(nic),pd(nic),im,jm,jmc,0,jr,0)
+      go to 10
+c
+   50 if(nman.eq.1) call uneuman(q(n5),im,jm)
+      mcyc=mcyc+1
+c     ----------  Cycle ncyc times on grid m  ----------------------------
+      if(mcyc.lt.ncyc) go to 10
+c-time            tmg1=second()
+c-time            write(lout,*) ' time in ',ncyc,' cycles =',tmg1-tsu1
+c
+c     ----------  print out final residual and work units  ---------------
+        if(tol.ge.-1.) then
+cdir$ inline 
+      call ukey(m,np2,n5,n9,ni,jm,i9,j9,ifd,jr)
+      call ukey(m-1,np2,n5c,n9c,nic,jmc,i9c,j9c,ifdc,jrc)
+cdir$ noinline 
+      call urscal(
+     + ac(n5),aw(n5),as(n5),ae(n5),an(n5),asw(n9),ase(n9),
+     + ane(n9),anw(n9),q(n5),f(n5),f(n5c),q(n5c),rc(nic),
+     + im,jm,jmc,ifd,i9,j9,k,m,jr,1.,rmax,ipc,irc)
+      resid(mcyc)=rmax
+      confac(mcyc)=rmax/rmaxo
+      nb=0
+      ne=min0(6,mcyc)
+ 2029 write(lout,2033) (mc,mc=nb,ne)
+      write(lout,2032) (resid(mc),mc=nb,ne)
+      write(lout,2031) (confac(mc),mc=nb,ne)
+      nb=ne+1
+      ne=ne+min0(6,mcyc-ne)
+      if(nb.le.ne) go to 2029
+      fconfac=(rmax/rmax0)**(1./float(mcyc))
+      write(lout,2034) fconfac
+ 2034 format(30x,6(1h*)/,' average convergence factor =',f7.3,/,
+     + 30x,6(1h*))
+ 2033 format(7(4x,i2,4x))
+ 2031 format(7(1x,f9.3))
+ 2032 format(7(1x,e9.3))
+         endif
+      return
+   60 write(lout,1003) mcyc,resid(mcyc),tol
+      return
+ 1003 format(' cyc=',i2,' max(res)=',1pe8.2/
+     + ' tolerance condition tol=',1pe8.2,' satisfied')
+      end
+c----------------------------------------------------------------------
+      subroutine ugrdfn(m,ifd59,is5,is9,isi,np2,imx,jmx)
+      implicit real*8(a-h,o-z)
+c----------------------------------------------------------------------
+cdir$ noinline
+c  Given imx, jmx and ifd59 (See comments in mgss2), ugrdfn calculates
+c  the number of grids that will be needed.  Pointers into the arrays
+c  ac, aw, as, asw, ase, q, f, pu, pd, pc, ru, rd and rc and the size
+c  of each grid is calculated and stored in the array np2.  The 
+c  subroutine ukey is called to retrieve the grid information.
+c .....................................................................
+      parameter(n5=1,n9=2,ni=3,jm=4,i9=5,j9=6,ifd=7,jred=8)
+      integer np2(20,8)
+      common /cs/ icorstr,iprint
+      iq5=1
+      iq9=1
+      iqi=1
+      m=1
+      np2(m,1)=jmx
+      np2(m,2)=3
+   10 if(np2(m,1).le.3) go to 20
+      m=m+1
+      np2(m,1)=np2(m-1,1)/2+1
+      if(np2(m-1,2).eq.2.and.mod(np2(m-1,1),2).eq.1)
+     + np2(m,1)=np2(m,1)+1
+      np2(m,2)=2
+      go to 10
+   20 do 30 k=1,m
+      np2(m-k+1,jm)=np2(k,1)
+   30 np2(m-k+1,jred)=np2(k,2)
+      do 40 k=m,1,-1
+      ktot=imx*np2(k,jm)
+      np2(k,n5)=iq5
+      iq5=iq5+ktot
+      np2(k,n9)=iq9
+      if(k.lt.m.or.ifd59.eq.9) iq9=iq9+ktot
+      np2(k,ni)=iqi
+   40 if(k.lt.m) iqi=iqi+ktot
+      do 50 k=1,m
+      np2(k,i9)=imx
+      np2(k,j9)=np2(k,jm)
+   50 np2(k,ifd)=9
+         if(ifd59.eq.5) then
+      np2(m,i9)=1
+      np2(m,j9)=1
+      np2(m,ifd)=5
+         endif
+      is5=iq5-1
+      is9=iq9-1
+      isi=iqi-1
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine ukey(k,np2,nn5,nn9,nni,jjm,ii9,jj9,iifd,jjred)
+      implicit real*8(a-h,o-z)
+c----------------------------------------------------------------------
+c  Returns the grid pointers and dimension variables for grid k.  The
+c  information is stored in the array np2.
+c......................................................................
+      parameter(n5=1,n9=2,ni=3,jm=4,i9=5,j9=6,ifd=7,jred=8)
+      integer np2(20,8)
+      nn5=np2(k,n5)
+      nn9=np2(k,n9)
+      nni=np2(k,ni)
+      jjm=np2(k,jm)
+      ii9=np2(k,i9)
+      jj9=np2(k,j9)
+      iifd=np2(k,ifd)
+      jjred=np2(k,jred)
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine uintad(q,qc,pu,pd,im,jm,jmc,iadd,jred,ipc)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+c  iadd=1:
+c  Interpolates and adds the coarse grid (kf-1) correction, qc, to the
+c  fine grid (kf) approximation, q, at the black y-lines.
+c  iadd=0:
+c  In the full multigrid algorithm, the solution to the coarse grid
+c  (kf-1) difference equation is interpolated to the fine grid (kf)
+c  to be used as the initial guess vector for kf=2,3,...,m.
+c  Interpolation is at black y-lines only.
+c .....................................................................
+      real*8 q(im,jm),qc(im,jmc),pu(im,jmc),pd(im,jmc)
+      im1=im-1
+      jm1=jm-1
+      jblack=5-jred
+c                                    add correction to next finer grid
+ 1000    if(iadd.eq.1) then
+      jc=3-jred
+      do 10 j=jblack,jm1,2
+      jc=jc+1
+      do 10 i=2,im1
+   10 q(i,j)=q(i,j)+pd(i,jc)*qc(i,jc)+pu(i,jc)*qc(i,jc+1)
+c
+c                       interpolate solution to next finer grid in fmg
+ 1001    else
+      jc=3-jred
+      do 40 j=jblack,jm1,2
+      jc=jc+1
+      do 40 i=2,im1
+   40 q(i,j)=pd(i,jc)*qc(i,jc)+pu(i,jc)*qc(i,jc+1)
+ 1002    endif
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine uneuman(q,im,jm)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+c  For problems with homogeneous Neumann boundary contitions, the
+c  condition that the integral of q over the domain be zero is added
+c  to the set of difference equations in order to obtain a unique
+c  solution.
+c......................................................................
+      real*8 q(im,jm)
+      im1=im-1
+      jm1=jm-1
+      con=0.
+      do 10 j=2,jm1
+      do 10 i=2,im1
+   10 con=con+q(i,j)
+      con=con/((im-2)*(jm-2))
+      do 20 j=2,jm1
+      do 20 i=2,im1
+   20 q(i,j)=q(i,j)-con
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine urelax(ac,aw,as,ae,an,asw,ase,ane,anw,f,q,gam,
+     + im,jm,i9,j9,ifd,nman,k,m,jred,ipc,iprcud)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+c  Performs red/black x-line relaxation.  The Thomas algorithm is used
+c  to solve the tridiagonal matrices.
+c** INPUT -
+c       ac-anw=  finite difference operator coeficients
+c            q=  initial approximation
+c            f=  right hand side vector
+c        im,jm=  the number of grid points in the x,y-directions
+c        i9,j9=  the i,j-dimensions of the arrays asw,ase
+c          ifd=  5 or 9 - the size of the stencil
+c         nman-  =0 usually.
+c                =1 signals that the fine grid equations are singular
+c                for the case when Neumann boundary conditions are
+c                applied along the entire boundary.  In this case, the
+c                equations on the coarsest grid (consisting of a single
+c                line of unknowns) is a singular tridiagonal system
+c                and the Thomas algorithm is modified on this grid to
+c                obtain a solution with an arbitrary constant vector
+c                component.  This constant vector is removed on the
+c                finest grid by the call to subroutine uneuman.
+c** OUTPUT -
+c            q=  final approximation after a red/black relaxation sweep
+c .....................................................................
+      real*8 ac(im,jm),aw(im,jm),as(im,jm),ae(im,jm),an(im,jm),
+     + asw(i9,j9),ase(i9,j9),ane(i9,j9),anw(i9,j9)
+      real*8 f(im,jm),q(im,jm),gam(im)
+      jm1=jm-1
+      im1=im-1
+      im2=im-2
+      jblack=5-jred
+c                                              usual red/black relaxatio
+      nrel=2
+      jrb=jred
+c                                             ipc ..brbr relaxation swee
+ 1000    if(iprcud.eq.1) then
+      nrel=ipc
+      if(mod(ipc,2).eq.0) jrb=jblack
+c                                     1 black relax for calc g pu,pd,ru,
+ 1001    elseif(iprcud.eq.2) then
+      nrel=1
+      jrb=jblack
+ 1002    endif
+c
+c
+      do 109 nrr=1,nrel
+ 5000    if(jrb.eq.jblack) then
+c                                                             black rela
+ 6000    if(jblack.le.jm1) then
+ 1400    if(iprcud.ne.2) then
+c
+      do 110 j=jblack,jm1,2
+      do 110 i=2,im1
+  110 q(i,j)=f(i,j)-as(i,j)*q(i,j-1)-an(i,j)*q(i,j+1)
+ 7000    if(ifd.eq.9) then
+      do 120 j=jblack,jm1,2
+      do 120 i=2,im1
+  120 q(i,j)=q(i,j)-asw(i,j)*q(i-1,j-1)-ase(i,j)*q(i+1,j-1)-
+     + anw(i,j)*q(i-1,j+1)-ane(i,j)*q(i+1,j+1)
+ 7001    endif
+ 1401    endif
+c                                                black tridiagonal solve
+c**
+c**  Moved calculation of loop 129 from loop 130 for vectorization
+c**  on vector machines (ie. Cray)
+c**  By: John Towns 2/6/92
+c**
+      do 129 j=jblack,jm1,2        
+  129 q(2,j)=q(2,j)/ac(2,j)
+
+c**
+c**  Changed bet=(quantity) to bet=1./(quantity) to trade two divisions
+c**  for one division and two multiplies (more efficient on all
+c**  machines)
+c**  By: John Towns 2/6/92
+c**
+
+c**
+c**  Cray compiler directives to parallelize tridiagonal solve.
+c**  By: John Towns 4/13/92
+c**
+
+cmic$ parallel  private(bet,gam,i)
+cmic$1shared(ac,ae,aw,q,jblack,jm1,im1,im2)
+cmic$ do parallel
+      do 130 j=jblack,jm1,2
+      bet=1./ac(2,j)
+      do 140 i=3,im1
+      gam(i)=ae(i-1,j)*bet
+      bet=1./(ac(i,j)-aw(i,j)*gam(i))
+  140 q(i,j)=(q(i,j)-aw(i,j)*q(i-1,j))*bet
+      do 150 i=im2,2,-1
+  150 q(i,j)=q(i,j)-gam(i+1)*q(i+1,j)
+  130 continue
+cmic$ end do
+cmic$ end parallel
+ 6001    endif
+c                                                             red relax
+ 5001    else
+c
+      do 210 j=jred,jm1,2
+      do 210 i=2,im1
+  210 q(i,j)=f(i,j)-as(i,j)*q(i,j-1)-an(i,j)*q(i,j+1)
+ 1100    if(ifd.eq.9) then
+      do 220 j=jred,jm1,2
+      do 220 i=2,im1
+  220 q(i,j)=q(i,j)-asw(i,j)*q(i-1,j-1)-ase(i,j)*q(i+1,j-1)-
+     + anw(i,j)*q(i-1,j+1)-ane(i,j)*q(i+1,j+1)
+ 1101    endif
+c                                                      tridiagonal solve
+c                          nman=1 ==> avoid singularity on coarsest grid
+      imm=im1
+          if(nman.eq.1.and.k.eq.1) then
+      imm=im-2
+      q(im1,2)=0.
+      gam(im1)=0.
+          endif
+c
+c**
+c**  Moved calculation of loop 229 from loop 230 for vectorization
+c**  on vector machines (ie. Cray)
+c**  By: John Towns 2/6/92
+c**
+      do 229 j=jred,jm1,2
+  229 q(2,j)=q(2,j)/ac(2,j)
+
+c**
+c**  Changed bet=(quantity) to bet=1./(quantity) to trade two divisions
+c**  for one division and two multiplies (more efficient on all
+c**  machines)
+c**  By: John Towns 2/6/92
+c**
+
+c**
+c**  Cray compiler directives to parallelize tridiagonal solve.
+c**  By: John Towns 4/13/92
+c**
+
+cmic$ parallel  private(bet,gam,i) 
+cmic$1shared(ac,ae,aw,q,jred,jm1,im2,imm)
+cmic$ do parallel
+      do 230 j=jred,jm1,2
+      bet=1./ac(2,j)
+      do 240 i=3,imm
+      gam(i)=ae(i-1,j)*bet
+      bet=1./(ac(i,j)-aw(i,j)*gam(i))
+  240 q(i,j)=(q(i,j)-aw(i,j)*q(i-1,j))*bet
+      do 250 i=im2,2,-1
+  250 q(i,j)=q(i,j)-gam(i+1)*q(i+1,j)
+  230 continue
+cmic$ end do
+cmic$ end parallel
+ 5002    endif
+      jrb=5-jrb
+  109 continue
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine urscal(
+     + ac,aw,as,ae,an,asw,ase,ane,anw,q,f,fc,qc,rc,
+     + im,jm,jmc,ifd,i9,j9,kf,m,jred,tol,rmax,ipc,irc)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+c  Defines the grid kf-1 right hand side, fc, as the restriction of the
+c  grid kf residual.  The restriction operator is the transpose of the
+c  interpolation operator.  Note:  The grid kf residual is zero at the
+c  black lines (j-direction) as a result of red/black relaxation.
+c  Thus, the restriction is simple injection.  The initial guess, qc,
+c  for the coarse grid correction equation is set to zero.  The
+c  maximum norm of the residual is calculated and returned in rmax.
+c......................................................................
+      real*8 ac(im,jm),aw(im,jm),as(im,jm),ae(im,jm),an(im,jm),
+     + asw(i9,j9),ase(i9,j9),ane(i9,j9),anw(i9,j9)
+      real*8 f(im,jm),q(im,jm),fc(im,jmc),qc(im,jmc)
+      real*8 rc(im,jmc)
+      rmax=0.
+      im1=im-1
+      jm1=jm-1
+      jmc1=jmc-1
+      jc=1
+      do 10 j=jred,jm1,2
+      jc=jc+1
+      do 10 i=2,im1
+   10 fc(i,jc)=f(i,j)-as(i,j)*q(i,j-1)-an(i,j)*q(i,j+1)-
+     + aw(i,j)*q(i-1,j)-ae(i,j)*q(i+1,j)-ac(i,j)*q(i,j)
+ 1000    if(ifd.eq.9) then
+      jc=1
+      do 20 j=jred,jm1,2
+      jc=jc+1
+      do 20 i=2,im1
+   20 fc(i,jc)=fc(i,jc)-asw(i,j)*q(i-1,j-1)-ane(i,j)*q(i+1,j+1)-
+     + ase(i,j)*q(i+1,j-1)-anw(i,j)*q(i-1,j+1)
+ 1001    endif
+c                                           zero out qc as initial guess
+      do 25 jc=1,jmc
+      do 25 i=1,im
+   25 qc(i,jc)=0.
+c                                        if kf=m calculate residual norm
+ 2000    if((kf.eq.m.and.tol.ge.0.).or.tol.eq.-.5) then
+      do 30 jc=2,jmc1
+      do 30 i=2,im1
+      resmax=abs(fc(i,jc))
+   30 if(resmax.gt.rmax) rmax=resmax
+ 2001    endif
+c                                                 weight rhs if irc.ge.1
+ 3000    if(irc.eq.1.and.ipc.ge.1) then
+      do 40 jc=2,jmc1
+      do 40 i=2,im1
+   40 fc(i,jc)=rc(i,jc)*fc(i,jc)
+ 3001    endif
+c
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine ursrhs(f,fc,ru,rd,rc,im,jm,jmc,m,kf,jred,irc)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+c  Restricts the right hand side vector on grid kf onto grid kf-1 when
+c  the full multigrid (ifmg>0) option is used.  The restriction operator
+c  is NOT necessarily the transpose of the interpolation operator.
+c......................................................................
+      real*8 f(im,jm),fc(im,jmc),ru(im,jmc),rd(im,jmc),rc(im,jmc)
+      jm1=jm-1
+      im1=im-1
+      jc=1
+ 1000    if(irc.eq.0) then
+      do 10 j=jred,jm1,2
+      jc=jc+1
+      do 10 i=2,im1
+   10 fc(i,jc)=ru(i,jc-1)*f(i,j-1)+rd(i,jc)*f(i,j+1)+f(i,j)
+ 1001    else
+      do 20 j=jred,jm1,2
+      jc=jc+1
+      do 20 i=2,im1
+   20 fc(i,jc)=ru(i,jc-1)*f(i,j-1)+rd(i,jc)*f(i,j+1)+
+     + rc(i,jc)*f(i,j)
+ 1002    endif
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine useta(
+     + ac,aw,as,ae,an,asw,ase,ane,anw,acc,awc,asc,aec,
+     + anc,aswc,asec,anec,anwc,pu,pd,pc,ru,rd,rc,qw,fw,gam,
+     + im,jm,jmc,ifd,i9,j9,nman,kf,m,jred,ipc,irc,irurd)
+      implicit real*8 (a-h,o-z)
+c----------------------------------------------------------------------
+cdir$ noinline
+c     Calculates the interpolation coefficients from grid kf-1 to
+c     grid kf and the coarse grid operator on grid kf-1.
+c** INPUT -
+c    ac - anw = fine grid (kf) array stencil coeficients
+c            m=  total number of grids
+c           kf=  grid number of the fine grid
+c          ifd=  the size of the fine grid stencil (= 5 or 9)
+c        i9,j9=  the i,j-dimensions of the arrays asw,ase
+c        qw,fw=  coarse grid portions of q and f used for work arrays here
+c   (See comments in MGSS2 for details)
+c** OUTPUT -
+c  acc - anwc = coarse grid (kf-1) array stencil coeficients
+c        pu,pd=  arrays of interpolation coefficients from grid kf-1
+c                to grid kf
+c .....................................................................
+      real*8 ac(im,jm),aw(im,jm),as(im,jm),ae(im,jm),an(im,jm),
+     + asw(i9,j9),ase(i9,j9),ane(i9,j9),anw(i9,j9),
+     + ru(im,jmc),rd(im,jmc),rc(im,jmc),
+     + pu(im,jmc),pd(im,jmc),pc(im,jmc),gam(im)
+      real*8 acc(im,jmc),awc(im,jmc),asc(im,jmc),aec(im,jmc),
+     + anc(im,jmc),aswc(im,jmc),asec(im,jmc),anec(im,jmc),anwc(im,jmc)
+      real*8 qw(im,jm),fw(im,jm)
+      common /io/ linp,lout
+      common /prsol/ iprsol
+c
+      pcscale=.001
+c
+      im1=im-1
+      jm1=jm-1
+      jmc1=jmc-1
+      jblack=5-jred
+c                           zeroing out connections to fictitious points
+      do 1 j=1,jm
+      do 2 i=1,im,im1
+      ac(i,j)=0.
+      aw(i,j)=0.
+      as(i,j)=0.
+      ae(i,j)=0.
+    2 an(i,j)=0.
+      aw(2,j)=0.
+    1 ae(im1,j)=0.
+      do 3 i=1,im
+      do 4 j=1,jm,jm1
+      ac(i,j)=0.
+      aw(i,j)=0.
+      as(i,j)=0.
+      ae(i,j)=0.
+    4 an(i,j)=0.
+      as(i,2)=0.
+    3 an(i,jm1)=0.
+ 1000    if(ifd.eq.9) then
+      do 5 j=1,jm
+      do 6 i=1,im,im1
+      asw(i,j)=0.
+      ase(i,j)=0.
+      ane(i,j)=0.
+    6 anw(i,j)=0.
+      asw(2,j)=0.
+      anw(2,j)=0.
+      ase(im1,j)=0.
+    5 ane(im1,j)=0.
+      do 7 i=1,im
+      do 8 j=1,jm,jm1
+      asw(i,j)=0.
+      ase(i,j)=0.
+      ane(i,j)=0.
+    8 anw(i,j)=0.
+      ase(i,2)=0.
+      asw(i,2)=0.
+      ane(i,jm1)=0.
+    7 anw(i,jm1)=0.
+ 1001 endif
+c
+      do 9 jc=1,jmc
+      do 9 i=1,im
+      pc(i,jc)=0.
+      pu(i,jc)=0.
+      pd(i,jc)=0.
+      rc(i,jc)=0.
+      ru(i,jc)=0.
+    9 rd(i,jc)=0.
+c
+c                               calculation of interpolation coeficients
+c
+
+c                                                              define pc
+ 2000    if(ipc.ge.1) then
+      do 20 j=2,jm1
+      do 20 i=2,im1
+      fw(i,j)=0.
+   20 qw(i,j)=1.
+c
+      call urelax(ac,aw,as,ae,an,asw,ase,ane,anw,fw,qw,gam,im,jm,
+     + i9,j9,ifd,nman,kf,m,jred,ipc,1)
+c                                                              scale pc
+      pcmax=0.
+      jc=1
+      do 40 j=jred,jm1,2
+      jc=jc+1
+      do 40 i=2,im1
+      pc(i,jc)=qw(i,j)
+   40 pcmax=max(pcmax,abs(qw(i,j)))
+      do 50 jc=2,jmc1
+      do 50 i=2,im1
+      if(pc(i,jc).eq.0.) pc(i,jc)=pcscale
+   50 pc(i,jc)=pc(i,jc)/pcmax
+c
+ 2001    else
+      do 55 jc=2,jmc1
+      do 55 i=2,im1
+   55 pc(i,jc)=1.
+ 2002    endif
+c
+c                                                              define pu
+      jc=3-jred
+      do 60 j=jblack,jm1,2
+      jc=jc+1
+ 4000    if(ipc.eq.0) then
+      do 70 i=2,im1
+   70 qw(i,j)=-an(i,j)
+ 5000    if(ifd.eq.9) then
+      do 80 i=2,im1
+   80 qw(i,j)=qw(i,j)-ane(i,j)-anw(i,j)
+ 5001    endif
+ 4001    else
+      do 90 i=2,im1
+   90 qw(i,j)=-an(i,j)*pc(i,jc+1)
+ 6000    if(ifd.eq.9) then
+      do 100 i=2,im1
+  100 qw(i,j)=qw(i,j)-ane(i,j)*pc(i+1,jc+1)-anw(i,j)*pc(i-1,jc+1)
+ 6001    endif
+ 4002    endif
+   60 continue
+c                                                          solve for pu
+      call urelax(ac,aw,as,ae,an,asw,ase,ane,anw,fw,qw,gam,im,jm,
+     + i9,j9,ifd,nman,kf,m,jred,ipc,2)
+c
+
+      jc=3-jred
+      do 102 j=jblack,jm1,2
+      jc=jc+1
+ 3020    if(j.lt.jm1) then
+      do 103 i=2,im1
+  103 pu(i,jc)=qw(i,j)
+ 3021    endif
+  102 continue
+c
+c                                                              define pd
+      jc=3-jred
+      do 106 j=jblack,jm1,2
+      jc=jc+1
+ 8000    if(ipc.eq.0) then
+      do 130 i=2,im1
+  130 qw(i,j)=-as(i,j)
+ 9000    if(ifd.eq.9) then
+      do 140 i=2,im1
+  140 qw(i,j)=qw(i,j)-ase(i,j)-asw(i,j)
+ 9001    endif
+c
+ 8001    else
+c
+      do 150 i=2,im1
+  150 qw(i,j)=-as(i,j)*pc(i,jc)
+ 1100    if(ifd.eq.9) then
+      do 160 i=2,im1
+  160 qw(i,j)=qw(i,j)-ase(i,j)*pc(i+1,jc)-asw(i,j)*pc(i-1,jc)
+ 1101    endif
+ 8002    endif
+  106 continue
+c                                                          solve for pd
+      call urelax(ac,aw,as,ae,an,asw,ase,ane,anw,fw,qw,gam,im,jm,
+     + i9,j9,ifd,nman,kf,m,jred,ipc,2)
+c
+      jc=3-jred
+      do 105 j=jblack,jm1,2
+      jc=jc+1
+ 7010    if(j.gt.2) then
+      do 104 i=2,im1
+  104 pd(i,jc)=qw(i,j)
+ 7011    endif
+  105 continue
+c
+c                                            define restriction operator
+c
+c                                                              define rc
+ 1200    if(irc.eq.1) then
+      do 500 jc=2,jmc1
+      do 500 i=2,im1
+  500 rc(i,jc)=pc(i,jc)
+         else
+      do 502 jc=2,jmc1
+      do 502 i=2,im1
+  502 rc(i,jc)=1.
+ 1201    endif
+c   
+c                                           compute qw = -Cb(inv) * eb*
+ 1300    if(irurd.ge.1) then
+      jc=3-jred
+ 3300    if(irurd.eq.1) then
+      do 560 j=jblack,jm1,2
+      jc=jc+1
+      do 560 i=2,im1
+  560 qw(i,j)=1.
+ 3301    elseif(irurd.eq.2) then
+      do 561 j=jblack,jm1,2
+      jc=jc+1
+      do 561 i=2,im1
+  561 qw(i,j)=(pd(i,jc)*pc(i,jc)+pu(i,jc)*pc(i,jc+1))
+ 3302    endif
+c
+      call urelax(ac,aw,as,ae,an,asw,ase,ane,anw,fw,qw,gam,im,jm,
+     + i9,j9,ifd,nman,kf,m,jred,ipc,2)
+c
+      jc=3-jred
+      do 566 j=jblack,jm1,2
+      jc=jc+1
+c                                           compute ru = -b(j+1) * qw
+ 1400    if(j.lt.jm1) then
+      do 570 i=2,im1
+  570 ru(i,jc)=-as(i,j+1)*qw(i,j)
+ 1500    if(ifd.eq.9) then
+      do 580 i=2,im1
+  580 ru(i,jc)=ru(i,jc)-ase(i,j+1)*qw(i+1,j)-asw(i,j+1)*qw(i-1,j)
+ 1501    endif
+ 1401    endif
+c                             compute rd = -a(j-1) * c(j)(inv) * qw
+ 1600    if(j.gt.2) then
+      do 650 i=2,im1
+  650 rd(i,jc)=-an(i,j-1)*qw(i,j)
+ 1700    if(ifd.eq.9) then
+      do 660 i=2,im1
+  660 rd(i,jc)=rd(i,jc)-ane(i,j-1)*qw(i+1,j)-anw(i,j-1)*qw(i-1,j)
+ 1701    endif
+ 1601    endif
+  566 continue
+c
+ 1301    else
+c                                              else set ru=pu and rd=pd
+      jc=3-jred
+      do 670 j=jblack,jm1,2
+      jc=jc+1
+      do 670 i=2,im1
+      ru(i,jc)=pu(i,jc)
+  670 rd(i,jc)=pd(i,jc)
+ 1303    endif
+c
+c                                   calculating the coarse grid operator
+c
+ 1800    if(ipc+irc+irurd.eq.0) then
+      j=jred-2
+      do 200 jc=2,jmc1
+      j=j+2
+      do 200 i=2,im1
+      acc(i,jc)=ac(i,j)+an(i,j-1)*pu(i,jc-1)+as(i,j+1)*pd(i,jc)+
+     + pu(i,jc-1)*(as(i,j)+ac(i,j-1)*pu(i,jc-1))+
+     + pd(i,jc)*(an(i,j)+ac(i,j+1)*pd(i,jc))
+      awc(i,jc)=aw(i,j)+pd(i-1,jc)*aw(i,j+1)*pd(i,jc)+pu(i-1,jc-1)*
+     + aw(i,j-1)*pu(i,jc-1)
+      asc(i,jc)=as(i,j)*pd(i,jc-1)+pu(i,jc-1)*(as(i,j-1)+
+     + ac(i,j-1)*pd(i,jc-1))
+      aec(i,jc)=ae(i,j)+pd(i+1,jc)*ae(i,j+1)*pd(i,jc)+pu(i+1,jc-1)*
+     + ae(i,j-1)*pu(i,jc-1)
+      anc(i,jc)=an(i,j)*pu(i,jc)+pd(i,jc)*(an(i,j+1)+
+     + ac(i,j+1)*pu(i,jc))
+      aswc(i,jc)=pd(i-1,jc-1)*aw(i,j-1)*pu(i,jc-1)
+      asec(i,jc)=pd(i+1,jc-1)*ae(i,j-1)*pu(i,jc-1)
+      anec(i,jc)=pu(i+1,jc)*ae(i,j+1)*pd(i,jc)
+  200 anwc(i,jc)=pu(i-1,jc)*aw(i,j+1)*pd(i,jc)
+ 1900    if(ifd.eq.9) then
+      j=jred-2
+      do 210 jc=2,jmc1
+      j=j+2
+      do 210 i=2,im1
+      awc(i,jc)=awc(i,jc)+asw(i,j+1)*pd(i,jc)+anw(i,j-1)*pu(i,jc-1)+
+     + pd(i-1,jc)*anw(i,j)+pu(i-1,jc-1)*asw(i,j)
+      aec(i,jc)=aec(i,jc)+ase(i,j+1)*pd(i,jc)+ane(i,j-1)*pu(i,jc-1)+
+     + pd(i+1,jc)*ane(i,j)+pu(i+1,jc-1)*ase(i,j)
+      aswc(i,jc)=aswc(i,jc)+asw(i,j-1)*pu(i,jc-1)+pd(i-1,jc-1)*asw(i,j)
+      asec(i,jc)=asec(i,jc)+ase(i,j-1)*pu(i,jc-1)+pd(i+1,jc-1)*ase(i,j)
+      anec(i,jc)=anec(i,jc)+ane(i,j+1)*pd(i,jc)+pu(i+1,jc)*ane(i,j)
+  210 anwc(i,jc)=anwc(i,jc)+anw(i,j+1)*pd(i,jc)+pu(i-1,jc)*anw(i,j)
+ 1901    endif
+c
+ 1801    else
+c
+      j=jred-2
+      do 300 jc=2,jmc1
+      j=j+2
+      do 300 i=2,im1
+      acc(i,jc)=rc(i,jc)*(ac(i,j)*pc(i,jc)+
+     +                    as(i,j)*pu(i,jc-1)+
+     +                    an(i,j)*pd(i,jc))+
+     +        ru(i,jc-1)*(ac(i,j-1)*pu(i,jc-1)+
+     +                    an(i,j-1)*pc(i,jc))+
+     +          rd(i,jc)*(ac(i,j+1)*pd(i,jc)+
+     +                    as(i,j+1)*pc(i,jc))
+      awc(i,jc)=rc(i,jc)*aw(i,j)*pc(i-1,jc)+
+     +          rd(i,jc)*aw(i,j+1)*pd(i-1,jc)+
+     +        ru(i,jc-1)*aw(i,j-1)*pu(i-1,jc-1)
+      asc(i,jc)=rc(i,jc)*as(i,j)*pd(i,jc-1)+
+     +       ru(i,jc-1)*(ac(i,j-1)*pd(i,jc-1)+
+     +                   as(i,j-1)*pc(i,jc-1))
+      aec(i,jc)=rc(i,jc)*ae(i,j)*pc(i+1,jc)+
+     +          rd(i,jc)*ae(i,j+1)*pd(i+1,jc)+
+     +        ru(i,jc-1)*ae(i,j-1)*pu(i+1,jc-1)
+      anc(i,jc)=rc(i,jc)*an(i,j)*pu(i,jc)+
+     +         rd(i,jc)*(ac(i,j+1)*pu(i,jc)+
+     +                   an(i,j+1)*pc(i,jc+1))
+      aswc(i,jc)=ru(i,jc-1)*aw(i,j-1)*pd(i-1,jc-1)
+      asec(i,jc)=ru(i,jc-1)*ae(i,j-1)*pd(i+1,jc-1)
+      anec(i,jc)=rd(i,jc)*ae(i,j+1)*pu(i+1,jc)
+  300 anwc(i,jc)=rd(i,jc)*aw(i,j+1)*pu(i-1,jc)
+ 2100    if(ifd.eq.9) then
+      j=jred-2
+      do 310 jc=2,jmc1
+      j=j+2
+      do 310 i=2,im1
+      awc(i,jc)=awc(i,jc)+(rd(i,jc)*asw(i,j+1)+
+     +                   ru(i,jc-1)*anw(i,j-1))*pc(i-1,jc)+
+     +                    rc(i,jc)*(anw(i,j)*pd(i-1,jc)+
+     +                              asw(i,j)*pu(i-1,jc-1))
+      aec(i,jc)=aec(i,jc)+(rd(i,jc)*ase(i,j+1)+
+     +                   ru(i,jc-1)*ane(i,j-1))*pc(i+1,jc)+
+     +                    rc(i,jc)*(ane(i,j)*pd(i+1,jc)+
+     +                              ase(i,j)*pu(i+1,jc-1))
+      aswc(i,jc)=aswc(i,jc)+ru(i,jc-1)*asw(i,j-1)*pc(i-1,jc-1)+
+     +                        rc(i,jc)*asw(i,j)*pd(i-1,jc-1)
+      asec(i,jc)=asec(i,jc)+ru(i,jc-1)*ase(i,j-1)*pc(i+1,jc-1)+
+     +                        rc(i,jc)*ase(i,j)*pd(i+1,jc-1)
+      anec(i,jc)=anec(i,jc)+rd(i,jc)*ane(i,j+1)*pc(i+1,jc+1)+
+     +                      rc(i,jc)*ane(i,j)*pu(i+1,jc)
+  310 anwc(i,jc)=anwc(i,jc)+rd(i,jc)*anw(i,j+1)*pc(i-1,jc+1)+
+     +                      rc(i,jc)*anw(i,j)*pu(i-1,jc)
+ 2101    endif
+ 1802    endif
+cxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+         if(iprsol.eq.2.and.kf.eq.m.and.ifd.eq.5) then
+      do 111 j=2,jm1
+      do 111 i=2,im1
+               write(lout,1010) im,jm,an(i,j)
+               write(lout,1012) i,j,aw(i,j),ac(i,j),ae(i,j)
+ 111           write(lout,1011) as(i,j)
+ 1010 format(2(1x,i2),14x,f12.5)
+ 1011 format(20x,f12.5)
+ 1012 format(2(1x,i2),3(1x,f12.5))
+          endif
+         if(iprsol.eq.2.and.kf.eq.m.and.ifd.eq.9) then
+      do 115 j=2,jm1
+      do 115 i=2,im1
+               write(lout,1017) im,jm,anw(i,j),an(i,j),ane(i,j)
+               write(lout,1017) i,j,aw(i,j),ac(i,j),ae(i,j)
+ 115           write(lout,1016) asw(i,j),as(i,j),ase(i,j)
+ 1016  format(6x,3(1x,f12.5))
+ 1017  format(2(1x,i2),3(1x,f12.5))
+          endif
+cxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+      if(iprsol.eq.2.or.iprsol.eq.3) then
+      write(lout,*) ' coarse grid kf-1 ==',kf-1
+      do 211 jc=2,jmc1
+      do 211 i=2,im1
+      write(lout,2015) im,jmc,anwc(i,jc),anc(i,jc),anec(i,jc),pd(i,jc),
+     + rd(i,jc)
+      write(lout,2013) i,jc,awc(i,jc),acc(i,jc),aec(i,jc),pc(i,jc),
+     + rc(i,jc)
+      write(lout,2014) aswc(i,jc),asc(i,jc),asec(i,jc),pu(i,jc-1),
+     + ru(i,jc-1)
+ 211  write(lout,*) '                          m, kf-1=',m,kf-1
+ 2014 format(9x,3(1x,f11.5),3x,2(f11.5,1x))
+ 2013 format(3x,2(1x,i2),3(1x,f11.5),3x,2(f11.5,1x))
+ 2015 format(3x,2(1x,i2),3(1x,f11.5),3x,2(f11.5,1x))
+          endif
+cxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+      return
+      end
+c**********************************************************************
+      subroutine uoutpt(q,im,jm)
+      implicit real*8 (a-h,o-z)
+c**********************************************************************
+c  Sample output subroutine.  Prints out the values of q at the
+c  interior points of the finest grid.
+c**********************************************************************
+      common /io/ linp,lout
+      real*8 q(im,jm)
+      im1=im-1
+      jm1=jm-1
+      ie=1
+ 20   ib=ie+1
+      ie=ib+min0(5,im1-ib)
+      do 10 j=jm1,2,-1
+   10 write(lout,100) j,(q(i,j),i=ib,ie)
+      if(ie.lt.im1) go to 20
+  100 format(1x,i2,1x,6(1x,f10.4))
+      return
+      end
+c**********************************************************************
+      subroutine uputf(ac,aw,as,ae,an,f,nx,ny,
+     + lo,nxd,nyd,i32su)
+      implicit real*8 (a-h,o-z)
+c**********************************************************************
+      real*8 ac(lo:nxd,lo:nyd),aw(lo:nxd,lo:nyd),
+     + as(lo:nxd,lo:nyd),ae(lo:nxd,lo:nyd),
+     + an(lo:nxd,lo:nyd),f(lo:nxd,lo:nyd)
+      real*8 a(20),b(20),ab(20)
+      integer il(20),ir(20),jb(20),jt(20)
+      integer ibc(4)
+      common /io/ linp,lout
+c
+c dcell is the value assigned to the diagonal element of a dead
+c cell, i.e. a cell that has 0 conections to all its neighbors.
+c icendif determines the differencing scheme for the first order terms
+c icendif=0 - central differencing, =1 - forward differencing.
+c
+      dcell = 1.
+      do 321 j=lo,nyd
+      do 321 i=lo,nxd
+      ac(i,j)=0.
+      aw(i,j)=0.
+      as(i,j)=0.
+      ae(i,j)=0.
+ 321  an(i,j)=0.
+c
+      nx1=nx+1
+      ny1=ny+1
+      read(linp,*) icendif
+      read(linp,*) ibc(1),ibc(2),ibc(3),ibc(4)
+      read(linp,*) nreg
+      hx=1./nx
+      hy=1./ny
+      hx2=hx*hx
+      hy2=hy*hy
+      hxy2=hx2*hy2
+      dcell=hxy2*dcell
+      write(lout,1011) ibc(1),ibc(2),ibc(3),ibc(4),icendif,dcell,hx,hy
+ 1011 format(' ibc_l ibc_b ibc_r ibc_t   icendiff   dcell   hx      hy'/
+     + 4x,i1,5x,i1,5x,i1,5x,i1,7x,i1,6x,e8.2,2x,f6.4,2x,f6.4/)
+c
+      do 10 irg=1,nreg
+      read(linp,*) il(irg),ir(irg),jb(irg),jt(irg)
+      read(linp,*) xk,yk,sreg,freg
+      read(linp,*) a(irg),b(irg),ab(irg)
+      write(lout,1000) il(irg),ir(irg),jb(irg),jt(irg),xk,yk,sreg,freg
+ 1000 format(1x,i3,',',i3,' X ',i3,',',i3,2x,1pe8.2,1x,1pe8.2,2x,
+     + 1pe8.1,1x,1pe8.1)
+      write(lout,1001) a(irg),b(irg),ab(irg)
+ 1001 format(17x,' a=',1pe10.3,'  b=',1pe10.3,' ab=',1pe10.3)
+      xk=xk*hy2
+      yk=yk*hx2
+      sreg=sreg*hxy2
+      freg=freg*hxy2
+      a(irg)=a(irg)*hx2*hy
+      b(irg)=b(irg)*hx*hy2
+      ab(irg)=ab(irg)*hx*hy
+         if(icendif.eq.0) then
+      a(irg)=a(irg)/2.
+      b(irg)=b(irg)/2.
+      ab(irg)=ab(irg)/4.
+         endif
+      if(il(irg).eq.1) il(irg)=0
+      if(ir(irg).eq.nx) ir(irg)=nx1
+      if(jb(irg).eq.1) jb(irg)=0
+      if(jt(irg).eq.ny) jt(irg)=ny1
+      do 20 i=il(irg),ir(irg)
+      do 20 j=jb(irg),jt(irg)
+      aw(i,j)=xk
+      as(i,j)=yk
+      ac(i,j)=sreg
+   20 f(i,j)=freg
+   10 continue
+      write(lout,*) ' - - - - - - - - - - - - - - - - - - - - - - - - -'
+c                             defining coeficients by harmonic averaging
+      do 30 i=1,nx
+      asio=as(i,0)
+      do 30 j=1,ny1
+      aa=as(i,j)*asio
+         if(aa.gt.0.) then
+      t=2.*aa/(as(i,j)+asio)
+      asio=as(i,j)
+      as(i,j)=t
+         else
+      asio=as(i,j)
+      as(i,j)=0.
+         endif
+   30 continue
+      do 40 j=1,ny
+      awoj=aw(0,j)
+      do 40 i=1,nx1
+      aa=aw(i,j)*awoj
+         if(aa.gt.0.) then
+      t=2.*aa/(aw(i,j)+awoj)
+      awoj=aw(i,j)
+      aw(i,j)=t
+         else
+      awoj=aw(i,j)
+      aw(i,j)=0.
+         endif
+   40 continue
+      do 45 i=0,nx
+      do 45 j=0,ny
+      ae(i,j)=aw(i+1,j)
+   45 an(i,j)=as(i,j+1)
+      do 50 i=1,nx
+      do 50 j=1,ny
+      ac(i,j)=ac(i,j)-aw(i,j)-as(i,j)-ae(i,j)-
+     + an(i,j)
+   50 if(ac(i,j).eq.0.) ac(i,j)=dcell
+c                                        adding on the unsymmetric terms
+      do 51 irg=1,nreg
+c                                           icendif=0 ==> central diff g
+         if(icendif.eq.0) then
+      do 52 i=il(irg),ir(irg)
+      do 52 j=jb(irg),jt(irg)
+      aw(i,j)=aw(i,j)-a(irg)
+      ae(i,j)=ae(i,j)+a(irg)
+      an(i,j)=an(i,j)+b(irg)
+   52 as(i,j)=as(i,j)-b(irg)
+c                                        icendif=1 ==> upstream diff s
+         elseif(icendif.eq.1) then
+      do 54 i=il(irg),ir(irg)
+      do 54 j=jb(irg),jt(irg)
+      ac(i,j)=ac(i,j)-a(irg)
+      ae(i,j)=ae(i,j)+a(irg)
+      an(i,j)=an(i,j)+b(irg)
+   54 ac(i,j)=ac(i,j)-b(irg)
+         endif
+   51 continue
+c                           set boundary conditions for 5 point operator
+      do 60 j=1,ny
+c                                                          left boundary
+      ae(0,j)=aw(1,j)
+         if(ibc(1).eq.1) then
+      ac(0,j)=aw(1,j)
+      f(0,j)=2.*aw(1,j)*0.0
+         elseif(ibc(1).eq.2) then
+      ac(0,j)=-aw(1,j)
+      f(0,j)=hx*aw(1,j)*0.0
+         endif
+         if(ac(0,j).eq.0.) then
+      ae(0,j)=0.
+      ac(0,j)=dcell
+      f(0,j)=0.
+         endif
+c                                                         right boundary
+      aw(nx1,j)=ae(nx,j)
+         if(ibc(3).eq.1) then
+      ac(nx1,j)=ae(nx,j)      
+      f(nx1,j)=2.*ae(nx,j)*0.
+         elseif(ibc(3).eq.2) then
+      ac(nx1,j)=-ae(nx,j)
+      f(nx1,j)=hx*ae(nx,j)*0.
+         endif
+         if(ac(nx1,j).eq.0.) then
+      aw(nx1,j)=0.
+      ac(nx1,j)=dcell
+      f(nx1,j)=0.
+         endif
+   60 continue
+c
+      do 80 i=1,nx
+c                                                         lower boundary
+      an(i,0)=as(i,1)
+         if(ibc(2).eq.1) then
+      ac(i,0)=as(i,1)
+      f(i,0)=2.*as(i,1)*0.
+         elseif(ibc(2).eq.2) then
+      ac(i,0)=-as(i,1)
+      f(i,0)=hy*as(i,1)*0.
+         endif
+         if(ac(i,0).eq.0.) then
+      an(i,0)=0.
+      ac(i,0)=dcell
+      f(i,0)=0.
+         endif
+c                                                         upper boundary
+      as(i,ny1)=an(i,ny)
+         if(ibc(4).eq.1) then
+      ac(i,ny1)=an(i,ny)
+      f(i,ny1)=2.*an(i,ny)*0.
+         elseif(ibc(4).eq.2) then
+      ac(i,ny1)=-an(i,ny)
+      f(i,ny1)=2.*an(i,ny)*0.
+         endif
+         if(ac(i,ny1).eq.0.) then
+      as(i,ny1)=0.
+      ac(i,ny1)=dcell
+      f(i,ny1)=0.
+         endif
+   80 continue
+c                     connections between ghost boundary points zeroed
+      do 83 j=1,ny1
+      as(0,j)=0.
+      an(0,j-1)=0.
+      as(nx1,j)=0.
+   83 an(nx1,j-1)=0.
+      do 86 i=1,nx1
+      aw(i,0)=0.
+      ae(i-1,0)=0.
+      aw(i,ny1)=0.
+   86 ae(i-1,ny1)=0.
+c                                        corner stencils and rhs defined
+         if(i32su.eq.32) then
+      do 90 j=0,ny1,ny1
+      do 90 i=0,nx1,nx1
+      ac(i,j)=dcell
+      aw(i,j)=0.
+      ae(i,j)=0.
+      as(i,j)=0.
+      an(i,j)=0.
+   90 f(i,j)=0.
+         endif
+c                                i32su=22 - boundary conditions absorbed
+         if(i32su.eq.22) then
+      do 100 j=1,ny
+      awac=aw(1,j)/ac(0,j)
+      ac(1,j)=ac(1,j)-awac*ae(0,j)
+      aw(1,j)=0.
+      f(1,j)=f(1,j)-awac*f(0,j)
+      ac(0,j)=0.
+      ae(0,j)=0.
+      f(0,j)=0
+      awac=aw(nx1,j)/ac(nx1,j)
+      ac(nx,j)=ac(nx,j)-awac*ae(nx1,j)
+      ae(nx,j)=0.
+      f(nx,j)=f(nx,j)-awac*f(nx1,j)
+      ac(nx1,j)=0.
+      aw(nx1,j)=0.
+  100 f(nx1,j)=0.
+c
+      do 110 i=1,nx
+      asac=as(i,1)/ac(i,0)
+      ac(i,1)=ac(i,1)-asac*an(i,0)
+      as(i,1)=0.
+      f(i,1)=f(i,1)-asac*f(i,0)
+      ac(i,0)=0.
+      an(i,0)=0.
+      f(i,0)=0.
+      anac=an(i,ny)/ac(i,ny1)
+      ac(i,ny)=ac(i,ny)-anac*as(i,ny1)
+      an(i,ny)=0.
+      f(i,ny)=f(i,ny)-anac*f(i,ny1)
+      ac(i,ny1)=0.
+      as(i,ny1)=0.
+  110 f(i,ny1)=0.
+         endif
+      return
+      end