26 files changed, 3401 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..b72ed2c
--- /dev/null
+++ b/README
@@ -0,0 +1,19 @@
+Cactus Code Thorn pugh
+Authors    : ...
+Managed by : ... <...@...........>
+Version    : ...  
+CVS info   : : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n--------------------------------------------------------------------------
+
+1. Purpose of the thorn
+
+This thorn does ...
+
+2. Dependencies of the thorn
+
+This thorn additionally requires thorns ...
+
+3. Thorn distribution
+
+This thorn is available to ...
+
+4. Additional information
diff --git a/interface.ccl b/interface.ccl
new file mode 100644
index 0000000..cd3d94f
--- /dev/null
+++ b/interface.ccl
@@ -0,0 +1,6 @@
+# Interface definition for thorn pugh
+# $Header$
+
+implements:driver
+inherits: Cactus
+
diff --git a/param.ccl b/param.ccl
new file mode 100644
index 0000000..6dcb5d7
--- /dev/null
+++ b/param.ccl
@@ -0,0 +1,27 @@
+# Parameter definitions for thorn pugh
+# $Header$
+
+public:
+
+INTEGER nx "The size of the grid in the x direction"
+{
+    *:-1    :: "Fixed size of minus this per processor"
+    1:*     :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER ny "The size of the grid in the y direction"
+{
+    *:-1    :: "Fixed size of minus this per processor"
+    1:*     :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER nz "The size of the grid in the z direction"
+{
+    *:-1    :: "Fixed size of minus this per processor"
+    1:*     :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER ghost_size "The width of the ghost zone"
+{
+    1:*     :: "Must be a positive integer"
+} 1
diff --git a/schedule.ccl b/schedule.ccl
new file mode 100644
index 0000000..7731f24
--- /dev/null
+++ b/schedule.ccl
@@ -0,0 +1,7 @@
+# Schedule definitions for thorn pugh
+# $Header$
+
+schedule pugh_Startup at STARTUP
+{
+  LANG:C
+} "Pugh startup routine."
diff --git a/src/Comm.c b/src/Comm.c
new file mode 100644
index 0000000..69f1e9f
--- /dev/null
+++ b/src/Comm.c
@@ -0,0 +1,88 @@
+ /*@@
+   @file      Comm.c
+   @date      Thu Feb  4 11:34:29 1999
+   @author    Tom Goodale
+   @desc 
+   Pugh communication functions
+   @enddesc 
+ @@*/
+
+#include <stdarg.h>
+#include "flesh.h"
+
+int pugh_SyncGroup(cGH *GH, const char *group)
+{
+  return 0;
+}
+
+int pugh_EnableGroupStorage(cGH *GH, const char *group)
+{
+  return 0;
+}
+
+int pugh_DisableGroupStorage(cGH *GH, const char *group)
+{
+  return 0;
+}
+
+
+int pugh_EnableGroupComm(cGH *GH, const char *group)
+{
+  return 0;
+}
+
+int pugh_DisableGroupComm(cGH *GH, const char *group)
+{
+  return 0;
+}
+
+
+int pugh_Barrier(cGH *GH)
+{
+  return 0;
+}
+
+int pugh_Reduce(cGH *GH,               
+		const char *operation,
+		int n_infields,
+		int n_outfields,
+		int out_type,
+		void **outarray,
+		...)
+{
+  return 0;
+}
+
+
+
+int pugh_Interp(cGH *GH,
+		const char *operation,
+		int n_coords,
+		int n_infields,
+		int n_outfields,
+		int n_points,
+		int type,
+		...)
+{
+  return 0;
+}
+
+
+int pugh_ParallelInit(cGH *GH)
+{
+  return 0;
+}
+
+int pugh_Exit(cGH *GH)
+{
+  return 0;
+}
+
+int pugh_Abort(cGH *GH)
+{
+  return 0;
+}
+
+
+
+
diff --git a/src/GHExtension.c b/src/GHExtension.c
new file mode 100644
index 0000000..f36f6ee
--- /dev/null
+++ b/src/GHExtension.c
@@ -0,0 +1,44 @@
+ /*@@
+   @file      GHExtension.c
+   @date      Thu Feb  4 10:47:14 1999
+   @author    Tom Goodale
+   @desc 
+   Pugh GH extension stuff.
+   @enddesc 
+ @@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "flesh.h"
+#include "pugh.h"
+#include "declare_parameters.h"
+
+static char *rcsid = "$Header$";
+
+void *pugh_SetupGH(tFleshConfig *config, 
+		   int convergence_level, 
+		   cGH *GH)
+{
+  DECLARE_PARAMETERS
+  pGH *newGH;
+
+  newGH = SetupPGH(nx, ny, nz, ghost_size, PUGH_NO_STAGGER);
+
+  if(!newGH)
+  {
+    fprintf(stderr, "PUGH: Failed to allocate memory for a pGH !\n");
+    exit(2);
+  }
+
+  return newGH;
+}
+
+int pugh_InitGH(cGH *GH)
+{
+  return 0;
+}
+
+int pugh_rfrTraverseGH(cGH *GH, int rfrpoint)
+{
+  return 0;
+}
diff --git a/src/SetupPGH.c b/src/SetupPGH.c
new file mode 100644
index 0000000..76d1fd5
--- /dev/null
+++ b/src/SetupPGH.c
@@ -0,0 +1,1072 @@
+ /*@@
+   @file      SetupPGH.c
+   @date      Fri Feb 21 10:18:17 1997
+   @author    Paul Walker
+   @desc 
+   Initializes the pgh. You should really consult the
+   pGH description (@seeheader pGH.h) in order to clearly
+   understand what is going on.
+   @enddesc 
+ @@*/
+
+#include "pugh.h"
+
+/* Old Id: SetupPGH.c,v 1.29 1999/01/12 08:23:35 bruegman Exp */
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    SetupPGH
+   @date       Fri Feb 21 10:21:36 1997
+   @author     Paul Walker
+   @desc 
+   Sets up the PGH distribution based on processor number and
+   problem size. This amounts to:
+   <ul>
+     <li> Allocating and copying some stuff 
+     <li> Figuring out an optimal domain decomposition
+     <li> Setting up ghosts and overlaps.
+   </ul>
+   Of course the latter two are hard, so some more comments
+   are in order.
+   <p>
+   The second is in @seeroutine ProcTop now.
+   <p>
+   The third should be obvious ;-)
+   @enddesc 
+   @calls ProcTop
+   @history
+   @hauthor Gabrielle Allen @hdate 26 Sep 1998
+   @hdesc Changed IO parameter names
+   @hauthor Gabrielle Allen @hdate 17 Oct 1998
+   @hdesc Added initialisation of IO structure 
+   @hauthor Gabrielle Allen @hdate 16 Nov 1998
+   @hdesc Added initialisation GH->forceSync 
+   @hdate Thu Feb  4 11:13:37 1999 
+   @hauthor Tom Goodale
+   @hdesc Modified for Cactus 4.0
+@@*/
+
+pGH *SetupPGH(int nx, int ny, int nz, int stencil_width,int staggertype) {
+  pGH *GH;
+
+
+  void pGH_SetupBasics(pGH *, int, int, int, int, int);
+  void pGH_SetupBounds(pGH *, int, int, int);
+  void pGH_SetupOwnership(pGH *);
+
+  /* Allocate for myself */
+  GH = (pGH*) malloc(sizeof(pGH));
+
+  pGH_SetupBasics(GH,nx,ny,nz,stencil_width,staggertype);
+  pGH_SetupBounds(GH,nx,ny,nz);
+  pGH_SetupOwnership(GH);
+
+  /* slice center not yet set: -1 */
+  GH->spxyz[0]=-1;
+  GH->spxyz[1]=-1;
+  GH->spxyz[2]=-1;
+
+
+  /* And we're done... */
+  return GH;
+}
+
+
+void pGH_SetupBasics(pGH *GH, int nx, int ny, int nz,	
+		     int stencil_width,int staggertype) {
+  int i;
+  int nprocx,nprocy,nprocz;
+
+  pGHList *ltemp;
+
+  void ProcTop(int, int *, int *, int *);
+
+
+  /* MPI Goobits */
+#ifdef MPI
+  /* Set up my communicator. This would allow us to run pugh on 
+     a subset of processors at a later date if, for instance, we
+     were using panda or what not.
+   */
+  CACTUS_MPI_ERROR(MPI_Comm_dup(MPI_COMM_WORLD, &(GH->PUGH_COMM_WORLD)));
+
+  CACTUS_MPI_ERROR(MPI_Comm_size(GH->PUGH_COMM_WORLD, &(GH->nprocs)));
+  CACTUS_MPI_ERROR(MPI_Comm_rank(GH->PUGH_COMM_WORLD, &(GH->myproc)));
+#else
+  GH->nprocs = 1;
+  GH->myproc = 0;
+#endif
+
+  /* Activate GH (conv.grids can be inactivatet by nancheck so the evolution will 
+     carry on) */
+  GH->active=1;
+
+  /* Figure out the processor topology first so we can have constant 
+     work per processor.
+   */
+  ProcTop(GH->nprocs,&nprocx,&nprocy,&nprocz);
+  GH->nprocx = nprocx;
+  GH->nprocy = nprocy;
+  GH->nprocz = nprocz;
+
+
+  /* Set up the global size */
+  /* Try this exceptionally cheap method */
+  if (nx < 0 && GH->nprocx > 1) {
+    GH->nx = (GH->nprocx-2) * (-nx - 2*stencil_width) +
+      2 * (-nx - stencil_width);
+    if (staggertype == PUGH_STAGGER) GH->nx -= nprocx-1;
+  } else {
+    GH->nx = abs(nx);
+  }
+
+  if (ny < 0 && GH->nprocy > 1) {
+    GH->ny = (GH->nprocy-2) * (-ny - 2*stencil_width) +
+      2 * (-ny - stencil_width);
+    if (staggertype == PUGH_STAGGER) GH->ny -= nprocy-1;
+  } else {
+    GH->ny = abs(ny);
+  }
+
+  if (nz < 0 && GH->nprocz > 1) {
+    GH->nz = (GH->nprocz-2) * (-nz - 2*stencil_width) +
+      2 * (-nz - stencil_width);
+    if (staggertype == PUGH_STAGGER) GH->nz -= nprocz-1;
+  } else {
+    GH->nz = abs(nz);
+  }
+
+  GH->stencil_width = stencil_width;
+
+
+  /*
+  if (Contains("commmodel","loc") != 0) {
+    GH->commmodel = PUGH_ALLOCATEDBUFFERS;
+  } else {
+    GH->commmodel = PUGH_DERIVEDTYPES;
+  }
+  */
+  GH->commmodel = PUGH_ALLOCATEDBUFFERS;
+
+#ifdef 0  
+  /* Set up the IO Processor. */
+  GH->IO.outpfx[0] = '\0';
+
+  if (Contains("iomode","proc")) {
+    GH->IO.ioproc = GH->myproc;
+    GH->IO.nioprocs = GH->nprocs;
+    GH->IO.ioproc_every = 1;
+  } else if (Contains("iomode","np")) {
+    if (Geti("ioproc_every") > GH->nprocs) {
+      GH->IO.ioproc_every = GH->nprocs;
+      printf("Reducing ioproc_every to %d\n",GH->IO.ioproc_every);
+    } else {
+      GH->IO.ioproc_every  = Geti("ioproc_every");
+    }
+    GH->IO.nioprocs      = (GH->nprocs) / GH->IO.ioproc_every + 
+      (GH->nprocs % GH->IO.ioproc_every ? 1 : 0);
+    GH->IO.ioproc        = GH->myproc - GH->myproc % GH->IO.ioproc_every;
+  } else if (Contains_bounded("iomode","onefile")) {
+    GH->IO.ioproc       = 0;
+    GH->IO.nioprocs     = 1;
+    GH->IO.ioproc_every = GH->nprocs;
+  } else {
+    printf ("I don't understand iomode setting. Using one file\n");
+    GH->IO.ioproc  = 0;
+    GH->IO.nioprocs = 1;
+    GH->IO.ioproc_every = GH->nprocs;
+  }
+
+  GH->IO.downsample = Geti("out_downsample");;
+  GH->IO.onefileperslice = Contains("onefileperslice","yes");
+  
+  /* At the moment only have unchunked for a single output file */
+  if (Contains("iomode","unchunked")) {
+    if (GH->IO.ioproc_every >= GH->nprocs)
+      GH->IO.unchunkedresult = 1;
+    else {
+      printf("Unchunked out not currently supported for multiple\n");
+      printf("output file ... output will be chunked\n");
+      GH->IO.unchunkedresult = 0;
+    } 
+  } else {
+    GH->IO.unchunkedresult = 0;
+  }
+
+  /* See if we should be verbose or not */
+  if (Contains("IO_verbose","yes")) {
+    GH->IO.verbose = 1;
+  } else {
+    GH->IO.verbose = 0;
+  }
+
+  /* See what we should output */
+  if (Contains("IO_datestamp","no")) {
+    GH->IO.datestamp = 0;
+  } else {
+    GH->IO.datestamp = 1;
+  }
+  if (Contains("IO_parameters","no")) {
+    GH->IO.parameters = 0;
+  } else {
+    GH->IO.parameters = 1;
+  }
+  if (Contains("IO_scalars","no")) {
+    GH->IO.scalars = 0;
+  } else {
+    GH->IO.scalars = 1;
+  }
+  if (Contains("IO_structures","no")) {
+    GH->IO.structures = 0;
+  } else {
+    GH->IO.structures = 1;
+  }
+
+  /* Check if Panda is on */
+#ifdef THORN_PANDA
+  GH->IO.panda_on = Contains("panda_active","yes"); 
+#else
+  GH->IO.panda_on = 0;
+#endif
+
+#endif /* 0 */
+
+  /* set staggering flag */
+  GH->stagger = staggertype;
+
+  if (Contains("bound","periodic")) {
+    GH->periodic = 1;
+  } else {
+    GH->periodic = 0;
+  }
+
+  /* Grid function list */
+  GH->ngridFuncs = 0;
+  GH->gridFuncs = NULL;
+
+  /* Array Llist */
+  GH->nArrays = 0;
+  GH->Arrays  = NULL;
+
+  /* Allocate the bounds memory */
+  GH->lb        = (int **)malloc(GH->nprocs*sizeof(int *));
+  GH->ub        = (int **)malloc(GH->nprocs*sizeof(int *));
+  GH->neighbors = (int **)malloc(GH->nprocs*sizeof(int *));
+
+  GH->lb[0] = (int *)malloc(GH->nprocs*3*sizeof(int));
+  for (i=1;i<GH->nprocs;i++)
+    GH->lb[i] = GH->lb[0] + i*3;
+
+  GH->ub[0] = (int *)malloc(GH->nprocs*3*sizeof(int));
+  for (i=1;i<GH->nprocs;i++)
+    GH->ub[i] = GH->ub[0] + i*3;
+
+  GH->neighbors[0] = (int *)malloc(GH->nprocs*6*sizeof(int));
+  for (i=1;i<GH->nprocs;i++)
+    GH->neighbors[i] = GH->neighbors[0] + i*6;
+
+  /* Remote neighbor points */
+  GH->rnpoints = (int *)malloc(GH->nprocs*sizeof(int));
+  GH->rnx = (int *)malloc(GH->nprocs*sizeof(int));
+  GH->rny = (int *)malloc(GH->nprocs*sizeof(int));
+  GH->rnz = (int *)malloc(GH->nprocs*sizeof(int));
+
+  /* In general - we will noodle this later.*/
+  GH->level = 0;
+  GH->mglevel = 0;
+  GH->convlevel = 0;
+  GH->forceSync = 0;
+
+
+  /* Finally add myself to the pGH List */
+  /*
+  ltemp = (pGHList *)malloc(sizeof(pGHList));
+  ltemp->next = GHList;
+  ltemp->GH = GH;
+  GHList = ltemp;
+  */
+}
+
+
+void pGH_SetupBounds(pGH *GH,int nx,int ny,int nz) {
+  int i,j,k,l,dir,staglcv;
+  int xstep, ystep, zstep;
+  int lnx, lny, lnz;
+  int *dtindexes, *one, maxs, dirs[3];
+  int maxpoints, minpoints;
+  Double avgpoints;
+  int istart,jstart,kstart,iend,jend,kend;
+  int nprocx,nprocy,nprocz;
+  int staggertype;
+
+  staggertype = GH->stagger;
+
+  nprocx = GH->nprocx;
+  nprocy = GH->nprocy;
+  nprocz = GH->nprocz;
+
+  /* Now distribute the points among the processors. */
+
+  /* fixed work per proc? */
+  if (nx < 0) {
+    for (i=0;i<nprocx;i++)
+      for (j=0;j<nprocy;j++)
+	for (k=0;k<nprocz;k++) {
+	  int pnum = i + nprocx*(j+nprocy*k);
+
+	  /* Bounding box information */
+
+	  if (i == 0)  GH->lb[pnum][0] = 0;
+	  else        {GH->lb[pnum][0] = GH->ub[pnum-1][0] + 1 - 
+			 2*(GH->stencil_width);
+	  if(staggertype == PUGH_STAGGER) GH->lb[pnum][0] --;
+	  }
+	  GH->ub[pnum][0] = GH->lb[pnum][0] + abs(nx) - 1;
+
+	  if (j == 0)  GH->lb[pnum][1] = 0;
+	  else        {GH->lb[pnum][1] = GH->ub[pnum-nprocx][1] + 1 - 
+			 2*(GH->stencil_width);
+	  if(staggertype == PUGH_STAGGER) GH->lb[pnum][1] --;
+	  }
+	  GH->ub[pnum][1] = GH->lb[pnum][1] + abs(ny) - 1;
+
+	  if (k == 0)  GH->lb[pnum][2] = 0;
+	  else        {GH->lb[pnum][2] = GH->ub[pnum-nprocx*nprocy][2] + 1 - 
+			 2*(GH->stencil_width);
+	  if(staggertype == PUGH_STAGGER) GH->lb[pnum][2] --;
+	  }
+	  GH->ub[pnum][2] = GH->lb[pnum][2] + abs(nz) - 1;
+
+	}
+  }
+  else
+    {
+
+#ifdef THORN_BAM_ELLIPTIC
+      /* BB: for multigrid, boundary points of octants are treated as ghosts
+         put them on single processor!
+
+	 1. we use the standard algorithm on a grid reduced by stencil_width 
+	    in every direction to guarantee the proper grid alignment
+      */
+      if (Contains("grid", "BAM"))
+      {  if( Contains("grid", "octant"))
+         {  GH->nx -= GH->stencil_width;
+            GH->ny -= GH->stencil_width;
+            GH->nz -= GH->stencil_width;
+         }
+         if( Contains("grid", "bitant"))
+            GH->nz -= GH->stencil_width;
+      }
+#endif
+
+      xstep = (GH->nx-1) / nprocx;
+      ystep = (GH->ny-1) / nprocy;
+      zstep = (GH->nz-1) / nprocz;
+ 
+      for (i=0;i<nprocx;i++)
+	for (j=0;j<nprocy;j++)
+	  for (k=0;k<nprocz;k++) {
+	    int pnum = i + nprocx*(j+nprocy*k);
+
+	    /* Bounding box information */
+	    if (i == 0)      GH->lb[pnum][0] = 0;
+	    else            {GH->lb[pnum][0] = i*xstep +1 - GH->stencil_width;
+	    if(staggertype == PUGH_STAGGER)
+	      GH->lb[pnum][0] --;
+	    }
+	    if (i == nprocx-1) GH->ub[pnum][0] = GH->nx-1;
+	    else             GH->ub[pnum][0] = (i+1)*xstep + GH->stencil_width;
+
+	    if (j == 0)      GH->lb[pnum][1] = 0;
+	    else            {GH->lb[pnum][1] = j*ystep +1 - GH->stencil_width;
+	    if(staggertype == PUGH_STAGGER)
+	      GH->lb[pnum][1] --;
+	    }
+	    if (j == nprocy-1) GH->ub[pnum][1] = GH->ny-1;
+	    else             GH->ub[pnum][1] = (j+1)*ystep + GH->stencil_width;
+
+	    if (k == 0)      GH->lb[pnum][2] = 0;
+	    else             {GH->lb[pnum][2] = k*zstep +1 - GH->stencil_width;
+	    if(staggertype == PUGH_STAGGER)
+	      GH->lb[pnum][2] --;
+	    }
+	    if (k == nprocz-1) GH->ub[pnum][2] = GH->nz-1;
+	    else             GH->ub[pnum][2] = (k+1)*zstep + GH->stencil_width;
+      
+	  }
+
+#ifdef THORN_BAM_ELLIPTIC 
+      /* 2. add ghosts to grids at octant boundary using uniform shift */
+
+      if (Contains("grid", "BAM"))
+      {  if (Contains("grid", "octant"))
+         {  GH->nx += GH->stencil_width;
+            GH->ny += GH->stencil_width;
+            GH->nz += GH->stencil_width;
+            for (i = 0; i < GH->nprocs; i++) {
+              for (j = 0; j < 3; j++) {
+                if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width;
+                GH->ub[i][j] += GH->stencil_width;
+              }
+            }
+         }
+         if (Contains("grid", "bitant"))
+         {  GH->nz += GH->stencil_width;
+            for (i = 0; i < GH->nprocs; i++) {
+              for (j = 2; j < 3; j++) {
+                if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width;
+                GH->ub[i][j] += GH->stencil_width;
+              }
+            }
+         }
+      }
+#endif
+
+    }
+}
+
+
+
+
+void pGH_SetupOwnership(pGH *GH) {
+  int i,j,k,l,dir,staglcv;
+  int xstep, ystep, zstep;
+  int lnx, lny, lnz;
+  int *dtindexes, *one, maxs, dirs[3];
+  int maxpoints, minpoints;
+  Double avgpoints;
+  int istart,jstart,kstart,iend,jend,kend;
+  int nprocx,nprocy,nprocz;
+
+  maxpoints = 0; minpoints = GH->nx*GH->ny*GH->nz; avgpoints = 0;
+
+  nprocx = GH->nprocx;
+  nprocy = GH->nprocy;
+  nprocz = GH->nprocz;
+
+  for (i=0;i<nprocx;i++)
+    for (j=0;j<nprocy;j++)
+      for (k=0;k<nprocz;k++) {
+	int pnum = i + nprocx*(j+nprocy*k);
+
+	/* Neighbors are easy! Just recall that i dir is x dir etc ...*/
+	if (i == nprocx-1) 
+	  if (GH->periodic)
+	    /* Use the i = 0 processor */
+	    GH->neighbors[pnum][XDP] = 0 + nprocx*(j+nprocy*k);
+	  else
+	    GH->neighbors[pnum][XDP] = -1;
+	else
+	  GH->neighbors[pnum][XDP] = (i+1) + nprocx*(j+nprocy*k);
+
+	if (i == 0)
+	  if (GH->periodic) 
+	    /* Use the nprocx processor */
+	    GH->neighbors[pnum][XDM] = nprocx-1 + nprocx*(j+nprocy*k);
+	  else
+	    GH->neighbors[pnum][XDM] = -1;
+	else
+	  GH->neighbors[pnum][XDM] = (i-1) + nprocx*(j+nprocy*k);
+
+	if (j == nprocy-1)
+	  if (GH->periodic)
+	    /* Use the 0 processor */
+	    GH->neighbors[pnum][YDP] = (i) + nprocx*(0+nprocy*k);
+	  else
+	    GH->neighbors[pnum][YDP] = -1;
+	else
+	  GH->neighbors[pnum][YDP] = (i) + nprocx*(j+1+nprocy*k);
+	
+	if (j == 0) 
+	  if (GH->periodic)
+	    /* Use the nprocy-1 processor */
+	    GH->neighbors[pnum][YDM] = (i) + nprocx*(nprocy-1+nprocy*k);
+	  else
+	    GH->neighbors[pnum][YDM] = -1;
+	else
+	  GH->neighbors[pnum][YDM] = (i) + nprocx*(j-1+nprocy*k);
+
+	if (k == nprocz-1)
+	  if (GH->periodic)
+	    /* Use the 0 processor */
+	    GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*0);
+	  else
+	    GH->neighbors[pnum][ZDP] = -1;
+	else
+	  GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*(k+1));
+
+	if (k == 0)
+	  if (GH->periodic)
+	    /* Use the nprocz-1 processor */
+	    GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(nprocz-1));
+	  else
+	    GH->neighbors[pnum][ZDM] = -1;
+	else
+	  GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(k-1));
+
+	/* And save the number of points on each processor
+	 * so we can get at it later ...
+	 */
+	GH->rnx[pnum] = (GH->ub[pnum][0]-GH->lb[pnum][0]+1);
+	GH->rny[pnum] = (GH->ub[pnum][1]-GH->lb[pnum][1]+1);
+	GH->rnz[pnum] = (GH->ub[pnum][2]-GH->lb[pnum][2]+1);
+	GH->rnpoints[pnum] = GH->rnx[pnum] * GH->rny[pnum] * GH->rnz[pnum];
+        if (GH->rnpoints[pnum] > maxpoints) maxpoints = GH->rnpoints[pnum];
+        if (GH->rnpoints[pnum] < minpoints) minpoints = GH->rnpoints[pnum];
+	avgpoints += GH->rnpoints[pnum];
+
+      }
+  avgpoints = (1.0 * avgpoints) / (GH->nprocs);
+
+  /* Local information */
+  GH->npoints = GH->rnpoints[GH->myproc];
+  GH->lnx     = GH->rnx[GH->myproc];
+  GH->lny     = GH->rny[GH->myproc];
+  GH->lnz     = GH->rnz[GH->myproc];
+  GH->maxskew = (maxpoints-minpoints)/avgpoints * 100.0;
+
+
+  /* So now set up the ownership, ghosts, and overlap */
+  
+  /* Ownership is pretty easy. Remember ownership is indexed as
+     [stagger][ijk][min/max]. See pGF_Reduction for a use of this,
+     among others.
+     Note: Ownership is same for staggered and non-staggered grids
+   */
+  GH->ownership[PUGH_VERTEXCTR][0][0] = (GH->lb[GH->myproc][0] == 0 ? 
+					 0 : GH->stencil_width);
+  GH->ownership[PUGH_VERTEXCTR][1][0] = (GH->lb[GH->myproc][1] == 0 ? 
+					 0 : GH->stencil_width);
+  GH->ownership[PUGH_VERTEXCTR][2][0] = (GH->lb[GH->myproc][2] == 0 ? 
+					 0 : GH->stencil_width);
+
+  GH->ownership[PUGH_VERTEXCTR][0][1]=(GH->ub[GH->myproc][0] == GH->nx-1 ? 
+				       GH->lnx : GH->lnx - GH->stencil_width);
+  GH->ownership[PUGH_VERTEXCTR][1][1]=(GH->ub[GH->myproc][1] == GH->ny-1 ? 
+				       GH->lny : GH->lny - GH->stencil_width);
+  GH->ownership[PUGH_VERTEXCTR][2][1]=(GH->ub[GH->myproc][2] == GH->nz-1 ? 
+				       GH->lnz : GH->lnz - GH->stencil_width);
+  /* correct for periodic identification : Tue Jun 17 08:40:15 CDT 1997 */
+  if(GH->periodic)
+  {
+    GH->ownership[PUGH_VERTEXCTR][0][0] = GH->stencil_width;
+    GH->ownership[PUGH_VERTEXCTR][1][0] = GH->stencil_width;
+    GH->ownership[PUGH_VERTEXCTR][2][0] = GH->stencil_width;
+    GH->ownership[PUGH_VERTEXCTR][0][1] = GH->lnx - GH->stencil_width;
+    GH->ownership[PUGH_VERTEXCTR][1][1] = GH->lny - GH->stencil_width;
+    GH->ownership[PUGH_VERTEXCTR][2][1] = GH->lnz - GH->stencil_width;
+  }
+     
+
+  if (GH->nx == 1) {
+    GH->ownership[PUGH_VERTEXCTR][0][0] = 0;
+    GH->ownership[PUGH_VERTEXCTR][0][1] = 1;
+  }
+  if (GH->ny == 1) {
+    GH->ownership[PUGH_VERTEXCTR][1][0] = 0;
+    GH->ownership[PUGH_VERTEXCTR][1][1] = 1;
+  }
+  if (GH->nz == 1) {
+    GH->ownership[PUGH_VERTEXCTR][2][0] = 0;
+    GH->ownership[PUGH_VERTEXCTR][2][1] = 1;
+  }
+
+  /* Great now set up the overlap region. This is the region we own,
+     but which is covered by our neighbors ghost. So this is what we
+     want to SEND
+   */
+  for (dir=0;dir<6;dir++) {
+    int tmp;
+    /* We want to send at the dir - 1 */
+    istart = 0; iend = GH->lnx;
+    jstart = 0; jend = GH->lny;
+    kstart = 0; kend = GH->lnz;
+
+    /* Correct to direction speicif sweeps ... */
+    if (dir == 0) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      istart = GH->stencil_width;
+      iend   = istart + GH->stencil_width;
+    }
+    if (dir == 1) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = iend;
+      istart = tmp - 2 * GH->stencil_width;
+      iend   = tmp - GH->stencil_width;
+    }
+    
+    if (dir == 2) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      jstart = GH->stencil_width;
+      jend   = jstart + GH->stencil_width;
+    }
+    if (dir == 3) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = jend;
+      jstart = tmp - 2 * GH->stencil_width;
+      jend   = tmp - GH->stencil_width;
+    }
+    
+    if (dir == 4) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      kstart = GH->stencil_width;
+      kend   = kstart + GH->stencil_width;
+    }
+    if (dir == 5) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = kend;
+      kstart = tmp - 2 * GH->stencil_width;
+      kend   = tmp - GH->stencil_width;
+    }
+    GH->overlap[PUGH_VERTEXCTR][dir][0][0] = istart;
+    GH->overlap[PUGH_VERTEXCTR][dir][0][1] = iend;
+    GH->overlap[PUGH_VERTEXCTR][dir][1][0] = jstart;
+    GH->overlap[PUGH_VERTEXCTR][dir][1][1] = jend;
+    GH->overlap[PUGH_VERTEXCTR][dir][2][0] = kstart;
+    GH->overlap[PUGH_VERTEXCTR][dir][2][1] = kend;
+  }
+
+  /* And finally the ghost indices. This used to be in
+     pGF_FinishRecv.c, but now it is here.
+   */
+  for (dir=0;dir<6;dir++) {
+    int tmp;
+    /* We want to send at the dir - 1 */
+    istart = 0; iend = GH->lnx;
+    jstart = 0; jend = GH->lny;
+    kstart = 0; kend = GH->lnz;
+
+    /* Correct to direction speicif sweeps ... */
+    /* Remember this is SW less than the other way... */
+    if (dir == 0) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      istart = 0;
+      iend   = GH->stencil_width;
+    }
+    if (dir == 1) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = iend;
+      istart = tmp - GH->stencil_width;
+      iend   = tmp;
+    }
+
+    if (dir == 2) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      jstart = 0;
+      jend   = GH->stencil_width;
+    }
+    if (dir == 3) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = jend;
+      jstart = tmp - GH->stencil_width;
+      jend   = tmp;
+    }
+
+    if (dir == 4) {
+      /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+      kstart = 0;
+      kend   = GH->stencil_width;
+    }
+    if (dir == 5) {
+      /* Want to go from nx - 2 sw to nx - sw
+	 (eg nx-2 to < nx-1, nx-4 to < nx-2)
+	 Luckily iend is aready set to nx ... */
+      tmp = kend;
+      kstart = tmp - GH->stencil_width;
+      kend   = tmp;
+    }
+
+    GH->ghosts[PUGH_VERTEXCTR][dir][0][0] = istart;
+    GH->ghosts[PUGH_VERTEXCTR][dir][0][1] = iend;
+    GH->ghosts[PUGH_VERTEXCTR][dir][1][0] = jstart;
+    GH->ghosts[PUGH_VERTEXCTR][dir][1][1] = jend;
+    GH->ghosts[PUGH_VERTEXCTR][dir][2][0] = kstart;
+    GH->ghosts[PUGH_VERTEXCTR][dir][2][1] = kend;
+  }
+
+  /* if staggering, set up ownership, overlaps, ghosts */
+  if(GH->stagger == PUGH_STAGGER) {
+    /* copy ownership from PUGH_VERTEXCTR */
+    for(i=1;i<PUGH_NSTAGGER;i++)
+    for(j=0;j<3;j++)
+    for(k=0;k<2;k++)
+      GH->ownership[i][j][k] = GH->ownership[PUGH_VERTEXCTR][j][k];
+
+    /* correct ownership for staggered grids */
+    for(i=0;i<PUGH_NSTAGGER;i++)
+    {
+      /* x-dir */
+      if(GH->ub[GH->myproc][0] != GH->nx-1)
+        GH->ownership[i][0][1] --;
+      else
+        if(i == PUGH_FACECTR_X)
+           GH->ownership[i][0][1] --;
+      /* y-dir */
+      if(GH->ub[GH->myproc][1] != GH->ny-1)
+        GH->ownership[i][1][1] --;
+      else
+        if(i == PUGH_FACECTR_Y)
+           GH->ownership[i][1][1] --;
+      /* z-dir */
+      if(GH->ub[GH->myproc][2] != GH->nz-1)
+        GH->ownership[i][2][1] --;
+      else
+        if(i == PUGH_FACECTR_Z)
+           GH->ownership[i][2][1] --;
+    }
+
+    /* set overlaps for staggered grids */
+    for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++)
+    for(dir=0;dir<6;dir++)
+    {
+      istart = 0;  iend = GH->lnx;
+      jstart = 0;  jend = GH->lny;
+      kstart = 0;  kend = GH->lnz;
+      if(staglcv == PUGH_FACECTR_X) iend --;
+      if(staglcv == PUGH_FACECTR_Y) jend --;
+      if(staglcv == PUGH_FACECTR_Z) kend --;
+      
+      if(dir == 0)
+      {
+        istart = GH->stencil_width;
+        iend = 2 * GH->stencil_width;
+        if(staglcv != PUGH_FACECTR_X)
+        {
+          istart ++;
+          iend ++;
+        }
+      }
+      if(dir == 1)
+      {
+        istart = GH->lnx - 2 * GH->stencil_width -1;
+        iend = istart + GH->stencil_width;
+      }
+      if(dir == 2)
+      {
+        jstart = GH->stencil_width;
+        jend = 2 * GH->stencil_width;
+        if(staglcv != PUGH_FACECTR_Y)
+        {
+          jstart ++;
+          jend ++;
+        }
+      }
+      if(dir == 3)
+      {
+        jstart = GH->lny - 2 * GH->stencil_width -1;
+        jend = jstart + GH->stencil_width;
+      }
+      if(dir == 4)
+      {
+        kstart = GH->stencil_width;
+        kend = 2 * GH->stencil_width;
+        if(staglcv != PUGH_FACECTR_Z)
+        {
+          kstart ++;
+          kend ++;
+        }
+      }
+      if(dir == 5)
+      {
+        kstart = GH->lnz - 2 * GH->stencil_width -1;
+        kend = kstart + GH->stencil_width;
+      }
+      GH->overlap[staglcv][dir][0][0] = istart;
+      GH->overlap[staglcv][dir][0][1] = iend;
+      GH->overlap[staglcv][dir][1][0] = jstart;
+      GH->overlap[staglcv][dir][1][1] = jend;
+      GH->overlap[staglcv][dir][2][0] = kstart;
+      GH->overlap[staglcv][dir][2][1] = kend;
+    }
+
+       
+    /* set ghosts for staggered grids */
+    for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++)
+    for(dir=0;dir<6;dir++)
+    {
+      istart = 0;  iend = GH->lnx;
+      jstart = 0;  jend = GH->lny;
+      kstart = 0;  kend = GH->lnz;
+      if(staglcv == PUGH_FACECTR_X) iend --;
+      if(staglcv == PUGH_FACECTR_Y) jend --;
+      if(staglcv == PUGH_FACECTR_Z) kend --;
+      
+      if(dir == 0)
+      {
+        istart = 0;
+        iend = GH->stencil_width;
+      }
+      if(dir == 1)
+      {
+        istart = GH->lnx - GH->stencil_width;
+        iend = GH->lnx;
+        if(staglcv == PUGH_FACECTR_X)
+        {
+          istart --;
+          iend --;
+        }
+      }
+      if(dir == 2)
+      {
+        jstart = 0;
+        jend = GH->stencil_width;
+      }
+      if(dir == 3)
+      {
+        jstart = GH->lny - GH->stencil_width;
+        jend = GH->lny;
+        if(staglcv == PUGH_FACECTR_Y)
+        {
+          jstart --;
+          jend --;
+        }
+      }
+      if(dir == 4)
+      {
+        kstart = 0;
+        kend = GH->stencil_width;
+      }
+      if(dir == 5)
+      {
+        kstart = GH->lnz - GH->stencil_width;
+        kend = GH->lnz;
+        if(staglcv == PUGH_FACECTR_Z)
+        {
+          kstart --;
+          kend --;
+        }
+      }
+      GH->ghosts[staglcv][dir][0][0] = istart;
+      GH->ghosts[staglcv][dir][0][1] = iend;
+      GH->ghosts[staglcv][dir][1][0] = jstart;
+      GH->ghosts[staglcv][dir][1][1] = jend;
+      GH->ghosts[staglcv][dir][2][0] = kstart;
+      GH->ghosts[staglcv][dir][2][1] = kend;
+    }
+  }
+
+
+#ifdef MPI
+  /* Great, now we have made these, lets set up the derived data types */
+  for (i=0;GH->commmodel==PUGH_DERIVEDTYPES && i<PUGH_NSTAGGER;i++) {
+    int *dtindices, *one, dir;
+    int ii,jj,kk,ll;
+
+    /* OK so now lets set up the communications datatypes */
+    dirs[0] =  GH->lny * GH->lnz;
+    dirs[1] =  GH->lnx * GH->lnz;
+    dirs[2] =  GH->lnx * GH->lny;
+    
+    maxs = dirs[0];
+    maxs = (dirs[1] > maxs ? dirs[1] : maxs);
+    maxs = (dirs[2] > maxs ? dirs[2] : maxs);
+    
+    maxs *= GH->stencil_width;
+
+
+    dtindices = (int *)malloc(maxs*sizeof(int));
+    one       = (int *)malloc(maxs*sizeof(int));
+
+    for (dir=0;dir<6;dir++) {
+      /* Great so now calculate. Sends first */
+      ll = 0;
+      for (kk = GH->overlap[i][dir][2][0];
+	   kk < GH->overlap[i][dir][2][1];
+	   kk ++)
+	for (jj = GH->overlap[i][dir][1][0];
+	     jj < GH->overlap[i][dir][1][1];
+	     jj ++)
+	  for (ii = GH->overlap[i][dir][0][0];
+	       ii < GH->overlap[i][dir][0][1];
+	       ii ++) {
+	    dtindices[ll] = DATINDEX(GH,ii,jj,kk);
+	    one[ll] = 1;
+	    ll++;
+	  }
+      MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE,
+		       &(GH->send_dt[i][dir]));
+      CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->send_dt[i][dir]))); 
+      /* And recieves */
+      ll = 0;
+      for (kk = GH->ghosts[i][dir][2][0];
+	   kk < GH->ghosts[i][dir][2][1];
+	   kk ++)
+	for (jj = GH->ghosts[i][dir][1][0];
+	     jj < GH->ghosts[i][dir][1][1];
+	     jj ++)
+	  for (ii = GH->ghosts[i][dir][0][0];
+	       ii < GH->ghosts[i][dir][0][1];
+	       ii ++) {
+	    dtindices[ll] = DATINDEX(GH,ii,jj,kk);
+	    one[ll] = 1;
+	    ll++;
+	  }
+      MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE,
+		       &(GH->recv_dt[i][dir]));
+      CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->recv_dt[i][dir])));
+    }
+
+    free(dtindices);
+    free(one);
+  }
+#endif
+
+}
+
+#ifdef 0
+
+ /*@@
+   @routine    DestroyPGH
+   @date       Thu Aug 21 11:38:10 1997
+   @author     Paul Walker
+   @desc 
+   Destroys a GH object.
+   @enddesc 
+@@*/
+void DestroyPGH(pGH **GHin) {
+  /* First remove me from the list. */
+  pGHList *tmp, *x;
+  pGH *GH;
+  int i;
+  int didit;
+
+  GH = *GHin;
+
+#ifdef MPI
+  CACTUS_MPI_ERROR(MPI_Comm_free(&(GH->PUGH_COMM_WORLD)));
+#endif
+
+  didit = 0;
+  if (GHList->GH == GH) { 
+    x = GHList;
+    GHList = GHList->next;
+    didit = 1;
+  } else {
+    for (tmp = GHList; tmp->next && tmp->next->GH != GH; tmp = tmp->next) {
+      /* Do nothing */
+    }
+    if (tmp->next) {
+      x = tmp->next;
+      tmp->next = tmp->next->next;
+      didit = 1;
+    }
+  }
+  if (!didit) {
+    printf ("Can't find GH in list\n");
+  } else {
+    free(x);
+  }
+
+  /* Great. Now go about the work of destroying me. */
+  for (i=0;i<GH->ngridFuncs;i++) 
+    DestroyPGF(GH, &(GH->gridFuncs[i]));
+
+  for (i=0;i<GH->nArrays;i++){
+    fflush(stdout);
+    DestroyPGArray(GH,&(GH->Arrays[i]));
+  }
+
+  free(GH->gridFuncs);
+  free(GH->Arrays);
+
+  free(GH->lb[0]);
+  free(GH->lb);
+
+  free(GH->ub[0]);
+  free(GH->ub);
+
+  free(GH->rnpoints);
+  free(GH->rnx);
+  free(GH->rny);
+  free(GH->rnz);
+
+  free(GH->neighbors[0]);
+  free(GH->neighbors);
+
+  free(GH);
+  *GHin = NULL;
+
+  fflush(stdout);
+}
+
+#endif
+
+ /*@@
+   @routine    ProcTop
+   @date       Thu May  1 10:13:40 1997
+   @author     Paul Walker
+   @desc 
+   Figures out a reasonable processor topology for a square
+   grid on n processors. That is, it figures out a,b,c such
+   that a*b*c = n and a,b, and c are in some senses minimal.
+   <p>
+   This algorithm will not guarantee the minimal condition,
+   or at least I can't prove it does, but it seems to on
+   a wide variety of processor numbers. So stop being a
+   weenie - not everything needs a proof!
+   <p>
+   Thie came to me in the shower, and is much better than
+   the old way I was doing it. I'm considering having my
+   working environment be the shower for the rest of time,
+   but there are some problems waterproofing my workstation.
+   @enddesc 
+   @calledby   SetupPGH
+@@*/
+
+void ProcTop(int np, int *npx, int *npy, int *npz) {
+  int x,y,z,r;
+  
+  if (Contains("proc_topology","manual")) {
+    x = Geti("proc_top_nx");
+    y = Geti("proc_top_ny");
+    z = Geti("proc_top_nz");
+    if (x * y * z != np) {
+      printf ("Manual processor topology error. %d x %d x %d != %d\n",
+	      x,y,z,np);
+#ifdef MPI
+      CACTUS_MPI_ERROR(MPI_Finalize());
+#endif
+      exit(0);
+    }
+  } else {
+    z = (int)(pow(np,1.0/3.0)+0.0001);
+    while (np % z != 0) z--;
+    r = np / z;
+    y = (int)(sqrt(r)+0.0001);
+    while (r % y != 0) y--;
+    x = r / y;
+
+#ifdef THORN_CARTOON_2D
+    if (Contains("cartoon_active","yes")) {
+      z = (int)(sqrt(np)+0.0001);
+      while (np % z != 0) z--;
+      y = 1;
+      x = np / z;
+    }
+#endif
+    
+    if (x * y * z != np) {
+      printf ("FATAL ERROR in Processor Topology\n");
+      printf ("This should never happen. Please report ASAP!\n");
+      exit(0);
+    }
+  }
+
+  /* NB: PW reverses these for better alignment of comm groups */
+  *npz = x;
+  *npy = y;
+  *npx = z;
+
+}
diff --git a/src/Startup.c b/src/Startup.c
new file mode 100644
index 0000000..1f2f25a
--- /dev/null
+++ b/src/Startup.c
@@ -0,0 +1,67 @@
+ /*@@
+   @file      Startup.c
+   @date      Wed Feb  3 23:10:19 1999
+   @author    Tom Goodale
+   @desc 
+   Startup routines for pugh.
+   @enddesc 
+ @@*/
+
+#include <stdio.h>
+
+#include "flesh.h"
+#include "GHExtensions.h"
+
+#include "pugh_extension.h"
+#include "pugh_Comm.h"
+
+static char *rcsid="$Header$";
+
+/* Store the handle in a global variable for the moment. */
+int PUGH_GHExtension;
+
+
+ /*@@
+   @routine    pugh_Startup
+   @date       Wed Feb  3 23:14:38 1999
+   @author     Tom Goodale
+   @desc 
+   The startup registration routine for PUGH.
+   @enddesc 
+   @calls     
+   @calledby   
+   @history 
+ 
+   @endhistory 
+
+@@*/
+void pugh_Startup(void)
+{
+  printf("Registering PUGH GH extension\n");
+
+  /* Register the stuff in the GH extension. */
+  PUGH_GHExtension = CCTK_RegisterGHExtension("PUGH");
+
+  CCTK_RegisterGHExtensionSetupGH(PUGH_GHExtension, pugh_SetupGH);
+
+  CCTK_RegisterGHExtensionInitGH(PUGH_GHExtension, pugh_InitGH);
+
+  CCTK_RegisterGHExtensionrfrTraverseGH(PUGH_GHExtension, pugh_rfrTraverseGH);
+
+  /* Overload some functions */
+  CCTK_OverloadSyncGroup(pugh_SyncGroup);
+  CCTK_OverloadEnableGroupStorage(pugh_EnableGroupStorage);
+  CCTK_OverloadDisableGroupStorage(pugh_EnableGroupStorage);
+
+  CCTK_OverloadEnableGroupComm(pugh_EnableGroupComm);
+  CCTK_OverloadDisableGroupComm(pugh_EnableGroupComm);
+
+  CCTK_OverloadBarrier(pugh_Barrier);
+  CCTK_OverloadReduce(pugh_Reduce);
+  CCTK_OverloadInterp(pugh_Interp);
+
+  CCTK_OverloadParallelInit(pugh_ParallelInit);
+  CCTK_OverloadExit(pugh_Exit);
+  CCTK_OverloadAbort(pugh_Abort);
+
+}
diff --git a/src/include/pGArray.h b/src/include/pGArray.h
new file mode 100644
index 0000000..a30b04b
--- /dev/null
+++ b/src/include/pGArray.h
@@ -0,0 +1,20 @@
+ /*@@
+   @file      pGArray.h
+   @date      
+   @author    
+   @desc 
+   @enddesc 
+   @version $Id$
+ @@*/
+
+typedef struct PGARRAY {
+  char     *name;
+  int       pgano;
+  Double   *data;
+
+  int       npoints;
+  struct PGH *parentGH;
+
+  int       do_arrayio;
+  IOFile    IEEEfile;
+} pGArray;
diff --git a/src/include/pGF.h b/src/include/pGF.h
new file mode 100644
index 0000000..d8dd2fa
--- /dev/null
+++ b/src/include/pGF.h
@@ -0,0 +1,100 @@
+ /*@@
+   @header    pGF.h
+   @date      Fri Feb 21 10:16:32 1997
+   @author    Paul Walker
+   @desc 
+   The pugh Grid Function structure. This is the heart of
+   the data storage and the like. You can't have a GF
+   without a GH, so you'd better see @seefile pGH.h
+   also.
+   <p>
+   One wonderfully new feature that a pGF has is that it contains
+   both a Double* padddata and a Double* data. What is the difference?
+   Well data is the base address of the fortran data set. It points
+   into padddata. This allows us, if we want to, to explicitly move
+   the starting point of the array to align with cache lines.
+   Important on some architectures. So the deal is if we are
+   in padding mode (see @seeroutine EnableGFDataStorage fro more
+   on that) padddata will be bigger than data, and data will point
+   to an address insinde padddata and such that lnx * lny * lnz
+   further down the data pointer is still in padddata. If we are
+   not in padding mode, data = padddata as pointers.
+   <p>
+   The only place we explicitly reference padddata is when we
+   create or de-allocate data. <b>All other operatoins shold
+   be done on *data</a>
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#define DATINDEX(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k)))
+#define DI(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k)))
+#define GDATINDEX(GH,i,j,k) ((i) + GH->nx*((j)+GH->ny*(k)))
+
+#ifndef IEEEIO
+typedef long IOFile;
+#endif
+
+typedef struct PGF {
+  char     *name;		/* The name of the grid function */
+  int       gfno;		/* My ID number in my GH parent. */
+  Double   *padddata;		/* Storage for the data. */
+  Double   *data;		/* See the note above. */
+  int       buffer_sz[6];	/* Size of the face ghost zones */
+  Double  **send_buffer;	/* Storage for buffered Comm if */
+  Double  **recv_buffer;	/* we don't use derived types */
+  int       commflag;		/* What is the comm flag set to? */
+  int       docomm[6];		/* Do we do comm or not? */
+  int       storage;		/* Do we have storage or not? */
+  int       stagger;		/* Only Vertex Centered now... */
+
+  struct PGH      *parentGH;		/* The GH to which I belong */
+				/* Note this is struct PGH whic is
+				   typedeffed to pGH in pGH.h, but
+				   that is included AFTER this so we 
+				   need the full name for the lookahead
+				   structure reference thingy.
+				 */
+
+  /* IO Related things */
+  FILE     *convfile;		/* File for convergence information */
+  FILE    **xgfile;		/* x,y,z and dline file pointers */
+  FILE    **zerodfile;		/* min,max,norm1,norm2 file pointers */
+  IOFile    IEEEfile;		/* File for IEEEIO */
+  char      IEEEfname[512];	/* IEEEIO 3D File name (to preserve handles
+				   we will calculate this once and then
+				   perhaps reopen and close it).
+				 */
+  IOFile    IEEEfile_2d[3];	/* Two-D IEEEIO File in each dir */
+  int       do_3dio;		/* Do 3D I/O state flag */
+  int       do_1dio;		/* Do 1D I/O state flag */
+  int       do_2dio;		/* Do 2D I/O state flag */
+  int       do_0dio;		/* Do 0D (eg, min max) I/O */
+  int       show_maxval;	/* Show runtime maxval state flag */
+  int       do_conv;		/* Convegence test this function? */
+  int       lastio_it[4];	/* Last iteration for I/O at 1, 2, and 3D.
+				   We need this since we don't want to
+				   multiply write data, which we can do
+				   in convergence mode. So rather than 
+				   be clever we just store the iteration
+				   when we write, and then don't write at
+				   the same iteration twice.
+				 */
+
+  Double    maxval, minval;	/* To store in case we toggle storage */
+  Double    norm1, norm2;
+#ifdef MPI
+  MPI_Request sreq[6], rreq[6];	/* Comm requests and statuses. */
+  MPI_Status ms;
+#endif
+
+#ifdef 0
+#include "pGF_Extensions.h"
+
+#ifdef THORN_EXACT /* thorn Comp and thorn Exact merge */
+#include "thorn_exact/src/include/pGF_ComparisonExtensions.h"
+#endif
+
+#endif
+
+} pGF;
diff --git a/src/include/pGH.h b/src/include/pGH.h
new file mode 100644
index 0000000..4318612
--- /dev/null
+++ b/src/include/pGH.h
@@ -0,0 +1,132 @@
+ /*@@
+   @header    pGH.h
+   @date      Fri Feb 21 10:12:58 1997
+   @author    
+   @desc 
+   The Pugh Grid Hierarchy structure. There isn't much point in having
+   a GridHierarchy without GridFunctions, so you'd better see
+   @seefile pGF.h also.
+   @enddesc 
+   @history
+   @hauthor Gabrielle Allen
+   @hdate Sep 10 @hdesc Added the iteration number on a GH
+   @endhistory
+   @history
+   @hauthor Gabrielle Allen @hdate 26 Sep 1998
+   @desc Changed IO parameter names, removed GH->iomode, added t_iopars
+        structure
+   @hauthor Gabrielle Allen @hdate 17 Oct 1998
+   @desc Added more IO parameters to IO structure
+        structure
+   @hauthor Gabrielle Allen @hdate 3 Nov 1998
+   @desc Added forceSync
+   @endhistory
+   @version $Id$
+ @@*/
+
+
+typedef struct PGH {
+
+#ifdef MPI
+  MPI_Comm PUGH_COMM_WORLD;       /* A MPIcommunicator */
+#endif
+
+  /* Size of the processor group */
+  int    nprocs;		/* Number of processors */
+  int    myproc;		/* My processor */
+  int    nprocx,nprocy,nprocz;	/* Processor topology */
+  int    commmodel;             /* Comm model is PUGH_ALLOCATEDBUFFERS */ 
+                                /* or PUGH_DERIVEDTYPES. Currently unused */
+  
+  /* Size of the problems */
+  int    ngridFuncs;		/* # of Grid Functions */
+  pGF  **gridFuncs;		/* Pointers to them */
+  int    nx,ny,nz;		/* Physical size of the Problem */
+  int    stencil_width;		/* Width of ghost zone */
+
+  int    nArrays;		/* # of pGArrays */
+  pGArray **Arrays;		/* Pointers to them */
+
+  /* Processor group layouts */
+  Double maxskew;		/* Maximum point skew */
+  int    periodic;		/* Is the system periodic? */
+  int  **lb;			/* Lower bound (nprocs X 3) for each proc */
+  int  **ub;			/* Upper bound (same sizes) */
+  int    lnx, lny, lnz;		/* Size on this processor */
+  int    npoints;		/* LOCAL number of points on this proc. */
+  int   *rnpoints;		/* Number of points on each proc */
+  int   *rnx, *rny, *rnz;	/* Number of points on each proc */
+  int  **neighbors;		/* An array of size nprocs X 6 of the
+				 * neighbors on each side. The entry in
+				 * this array is the processor number of
+				 * the neighbor.
+				 */
+  int stagger;                  /* Is staggering turned on? */
+  int spxyz[3];                 /* From what index to start slicing: 1 for octant, */
+                                /* nx/2 for box, e.g. Set in SetupSlicingCenter, */ 
+                                /* used in Write3D and modfied in in BoxInBox.c ! */
+  int forceSync;                 /* force synchronisation of GFs with storage */
+
+  int sp2dxyz[3];		/* Slice point for two d slices. */
+
+  /* Information about ghosts and overlaps. */
+  int ownership[PUGH_NSTAGGER][3][2];	/* The box owned in each direction. */
+                                        /* Indexing here is  [stagger][ijk][min/max]*/
+				
+  int ghosts[PUGH_NSTAGGER][6][3][2];	/*  The ghost zones on each face. */
+				        /*  Indexing here is  */
+				        /*  [stagger][face][ijk][min/max] */
+				
+  int overlap[PUGH_NSTAGGER][6][3][2];	/*  The overlap region owned on each face. */
+				        /*  Indexing here is  */
+				        /*  [stagger][face][ijk][min/max] */
+				 
+
+  /* Coordinate information */
+  Double  cx0, cy0, cz0;	/* Origin of our system */
+  Double  dx0, dy0, dz0, dt0;	/* Delta of our system */
+  Double  lx0, ly0, lz0;	/* Processor-Local coordinate origins */
+  Double  phys_time;            /* physical time */
+  int GHiteration;              /* iteration number on processor */
+
+  
+  
+
+  /* currently, all GHs are listed in a single, global GHList */
+  /* they are distinguished by a unique combination of level numbers */
+  int level;                    /* level in Berger-Oliger stack */
+  int mglevel;                  /* level in multigrid stack */
+  int convlevel;                /* level in convergence stack */
+
+  int active;                   /* 1 -> GH active; 0 -> GH is skipped for conv.test */
+                                /* is used in NanCheckGH.c */
+#ifdef MPI
+  /* Derived data types for communication */
+  MPI_Datatype send_dt[PUGH_NSTAGGER][6], recv_dt[PUGH_NSTAGGER][6];
+#endif
+
+  /* Extension of this structure can be brought in by thorns */
+  /*
+#include "pGH_Extensions.h"
+  */
+} pGH;
+
+
+/* This defines a single linked list of grid structures */
+/* the first item has level=0, next level=1, etc..      */
+
+typedef struct pGHLIST {
+  pGH *GH;
+  struct pGHLIST *next;
+} pGHList;
+
+
+
+#define XDP   1
+#define XDM   0
+#define YDP   3
+#define YDM   2
+#define ZDP   5
+#define ZDM   4
+
+
diff --git a/src/include/pugh.h b/src/include/pugh.h
new file mode 100644
index 0000000..673eea0
--- /dev/null
+++ b/src/include/pugh.h
@@ -0,0 +1,150 @@
+/*@@
+  @header pugh.h
+  @author Paul Walker
+  @date March 1997
+  @desc
+  This is just a bunch of includes and some simple definitions.
+  You should really see @seefile pGF.h @seefile pGH.h and
+  @seefile pughProtos.h for the pugh bit, and @seefile
+  pughProblem.h for the cactus-specific bits.
+  @enddesc
+  @version $Id$
+@@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <malloc.h>
+#include <assert.h>
+
+/* 3 ways up shutting down: */
+#define STOP  cshutdown(0);
+#define ABORT cabort(0);
+#define CORE  ccore();
+
+/* Two ways to shut down with a non-zero exit code. */
+#define ERROR_STOP(xerrcode)  cshutdown(xerrcode);
+#define ERROR_ABORT(xerrcode) cabort(xerrcode);
+
+
+/* We have our own Cactus memory routines in pughutils.c */
+
+#if defined(SGI) || defined(T3E)
+#undef malloc
+#define malloc(x) cactus_malloc(x)
+void *cactus_malloc(size_t);
+#undef free
+#define free(x) cactus_free(x)
+void cactus_free(void *);
+#endif
+
+#ifdef MPI
+#include "mpi.h"
+#endif
+
+#ifdef HDF
+#include "hdf.h"
+#endif
+
+#ifdef IEEEIO
+#include "IEEEIO.h"
+#endif
+
+#ifdef Panda_IO
+#include "PandaIO.h"
+#endif
+
+#ifdef TCP
+#include "TCP++/DataConnection.h"
+#include "TCP++/CommandProcessor.h"
+#endif
+
+/* Handle precision */
+#ifdef SINGLE_PRECISION
+#define PUGH_MPI_TYPE MPI_FLOAT
+typedef float Double;
+#else
+#define PUGH_MPI_TYPE MPI_DOUBLE
+typedef double Double;
+#endif
+
+#include "pugh_constants.h"
+
+#ifdef 0
+/* We need to include active thorns here so that the thorns can
+   modify the grid function and grid hierarchy (as box in box does).
+ */
+#include "pughActiveThorns.h"
+
+#ifdef THORN_BOXINBOX
+#include "AMRwriter.h"
+#endif
+
+#endif
+
+#include "pGF.h"
+#include "pGArray.h"
+#include "pGH.h"
+
+
+#ifdef 0
+/* Timing stuff */
+#include "pughTimers.h"
+
+#include "pughProtos.h"
+
+
+
+#ifndef WIN32
+#define BOLDON   "\033[1m"
+#define BOLDOFF  "\033[0m"
+#else
+#define BOLDON   ""
+#define BOLDOFF  ""
+#endif
+
+#if (defined(RS6000) || defined(SPX)||defined(HP))
+#define FORTRAN_NAME(x,y,z) z
+#else
+#if (defined(nCUBE2) || defined(CRAYC90) || defined(CRAYT3D) || defined (CRAYT3E))
+#define FORTRAN_NAME(x,y,z) y
+#else
+#if (defined(WIN32))
+#define FORTRAN_NAME(x,y,z) y
+#else
+#define FORTRAN_NAME(x,y,z) x
+#endif
+#endif
+#endif
+
+#endif /*0*/
+
+#ifdef MPI
+
+#define CACTUS_MPI_ERROR(xf)  do {int errcode; \
+                                    if((errcode = xf) != MPI_SUCCESS)                     \
+				    {                                                     \
+				      char mpi_error_string[MPI_MAX_ERROR_STRING+1];      \
+				      int resultlen;                                      \
+				      MPI_Error_string(errcode, mpi_error_string, &resultlen);\
+				      fprintf(stderr, "MPI Call %s returned error code %d (%s)\n", \
+                                      #xf, errcode, mpi_error_string);                    \
+				      fprintf(stderr, "At line %d of file %s\n",                   \
+					     __LINE__, __FILE__);                         \
+				    }                                                     \
+				  } while (0)
+#endif
+
+
+#include "pughDriver.h"
+
+#ifdef 0
+/* Runtime function repository */
+#include "rfr.h"
+
+/* Scalar Manipulation Functions. */
+
+#include "CactusScalarProtos.h"
+
+#endif
diff --git a/src/include/pughDriver.h b/src/include/pughDriver.h
new file mode 100644
index 0000000..73415ea
--- /dev/null
+++ b/src/include/pughDriver.h
@@ -0,0 +1,68 @@
+/*@@
+  @header pughDriver.h
+  @author Paul Walker
+  @date March 1997
+  @desc
+  This is stuff having to do with the driver, namely
+  the problem and globals, as well as definitions of what
+  the fort interfaces are. Also see @seefile pughProblem.h
+  for what the cactus-specific stuff is. The macros FORT_ARGS
+  and FORT_ARGS_PROTO passed from c routines must match 
+  FORT_GH_DECOMP and FORT_GHDECOMP_DECL received in fortran
+  routines defined in @seefile pughFortran.h
+  @enddesc
+  @history
+  @hauthor Gabrielle Allen
+  @hdate Sep 10 @hdesc Added the GH iteration number to fortran macros
+  @hauthor Gabrielle Allen @hdate 29 Sep 1998
+  @hdesc Removed levfac
+  @endhistory
+  @version $Id$
+  @@*/
+
+#ifdef 0
+extern pGHList *GHList;
+
+extern int nx0, ny0, nz0;
+extern int lb[3],ub[3],sh[3],bbox[6];
+extern int convergence;
+
+/* Timing variables */
+extern double commt[PUGH_NTIMERS];
+extern double elltime[PUGH_NTIMERS];
+
+/* How long to run */
+extern int itfirst, itlast, itout;	/* Iteration runs */
+extern int itout3, itout2, itout1, itout0, itoutarr, itinfo;
+
+extern int _show_storage;
+
+/* termination flags: pe-local and global: */
+extern int  cactus_terminate;
+
+/* FORT_ARGS must match FORT_ARGS_PROTO below, and FORT_GH_DECOMP, 
+   FORT_GHDECOMP_DECL in pughFortran.h */
+#define FORT_ARGS(xGH)  xGH, \
+    &(xGH->lnx),&(xGH->lny), &(xGH->lnz),\
+    lb,ub,sh,bbox, \
+    &(xGH->nx),&(xGH->ny),&(xGH->nz),\
+    &(xGH->cx0),&(xGH->cy0),&(xGH->cz0), \
+    &(xGH->dx0),&(xGH->dy0),&(xGH->dz0),&(xGH->dt0), &(xGH->phys_time), \
+    &(xGH->level), &(xGH->nprocs), &(xGH->myproc), \
+    &(xGH->convlevel), &(xGH->stencil_width), &(xGH->GHiteration)
+ 
+/* FORT_ARGS_PROTO must match FORT_ARGS above, and FORT_GH_DECOMP, 
+   FORT_GHDECOMP_DECL in pughFortran.h */
+#define FORT_ARGS_PROTO pGH *, \
+    int *, int *, int *, \
+    int *, int *, int *, int *, \
+    int *, int *, int *, \
+    Double *, Double *, Double *, \
+    Double *, Double *, Double *, Double *, Double *, \
+    int *, int *, int *, \
+    int *, int *, int *
+    
+
+#include "pughProblem.h"
+
+#endif
diff --git a/src/include/pughProblem.h b/src/include/pughProblem.h
new file mode 100644
index 0000000..2024a09
--- /dev/null
+++ b/src/include/pughProblem.h
@@ -0,0 +1,53 @@
+/*@@
+  @header pughProblem.h
+  @author Paul Walker
+  @date March 1997
+  @desc
+  This routine declares all the problem-specific globals
+  and teaches the driver how to make the grid functions
+  with the right names, and pass them to fortran. It
+  isn't that exciting, but it's pretty damned important
+  if you need to add new fields.
+  @enddesc
+  @version $Id$
+  @@*/
+
+#ifdef 0
+#include "gfIndices.h"
+#include "pughProblemDefs.h"
+
+/* Variables that ALL codes use. */
+#define USER_VARS \
+  int *_one; 
+
+/* Main externals */
+extern int *_one;
+
+/* Historical: Two macros for the same thing. PW. */
+#define PASS_FORT_FIELDS(xGH) \
+  PASS_FORT_FIELDS_SUBSET(xGH)
+
+
+/* If you add items to PASS_FORT_FIELDS make sure */
+/* you add the right type at the right position  */
+/* in PROTO_FORT_FIELDS */
+#define PROTO_FORT_FIELDS \
+    PROTO_FORT_FIELDS_SUBSET
+
+
+#ifdef WIN32
+#define FMODIFIER __stdcall
+#else
+#define FMODIFIER   
+#endif
+
+#ifdef THORN_CHECKPOINT
+  /* Is it possible to ifdef this? Or perhaps to rfr it? */
+#define f_recover FORTRAN_NAME(recover_,RECOVER,recover)
+void FMODIFIER f_recover(FORT_ARGS_PROTO,PROTO_FORT_FIELDS);
+#endif
+
+#include "cactus_constants.h"
+
+#endif
+
diff --git a/src/include/pugh_constants.h b/src/include/pugh_constants.h
new file mode 100644
index 0000000..f8bc138
--- /dev/null
+++ b/src/include/pugh_constants.h
@@ -0,0 +1,64 @@
+ /*@@
+   @file      pugh_constants.h
+   @date      
+   @author    
+   @desc 
+   @enddesc 
+   @version $Id$
+ @@*/
+
+/* Pre-processor constants start with PUGH_ */
+
+/* Constants for the comm directions */
+#define PUGH_NOCOMM 0
+#define PUGH_ALLCOMM 1
+#define PUGH_XCOMM 2
+#define PUGH_YCOMM 3
+#define PUGH_ZCOMM 4
+#define PUGH_PLUSFACESCOMM 5
+#define PUGH_MINUSFACESCOMM 6
+
+/* Constants for the storage */
+#define PUGH_NOSTORAGE 0
+#define PUGH_STORAGE 1
+
+/* Constants for the GF Type. Only one now ... */
+/* Number of staggerings available in the code */
+#define PUGH_NSTAGGER 4
+
+/* The index for each staggering type */
+/* This classifies each PGF member */
+#define PUGH_VERTEXCTR 0
+#define PUGH_FACECTR_X 1
+#define PUGH_FACECTR_Y 2
+#define PUGH_FACECTR_Z 3
+
+/* The index for each staggering type */
+/* This determines the PGH should be set up */
+#define PUGH_NO_STAGGER 0
+#define PUGH_STAGGER 1
+
+/* Comm types available */
+#define PUGH_NOCOMMBUFFERS    0
+#define PUGH_ALLOCATEDBUFFERS 1
+#define PUGH_DERIVEDTYPES     2
+
+/* Termination flags */
+#define TERMINATION_NOT_RAISED     0    /* no termination flag, inital setting      */
+#define TERMINATION_RAISED_LOCAL   1    /* raised on one pe, has to be broadcasted  */
+#define TERMINATION_RAISED_BRDCAST 2    /* flag now available on all pes: termiante */
+
+/* IO precision */
+#ifdef SINGLE_PRECISION
+#define IO_FLOAT FLOAT32
+#else
+#define IO_FLOAT FLOAT64
+#endif
+
+/* IO precision */
+#ifdef SINGLE_PRECISION
+#define CACTUS_FLOAT FLOAT32
+#else
+#define CACTUS_FLOAT FLOAT64
+#endif
+
diff --git a/src/make.code.defn b/src/make.code.defn
new file mode 100644
index 0000000..640faaa
--- /dev/null
+++ b/src/make.code.defn
@@ -0,0 +1,8 @@
+# Main make.code.defn file for thorn pugh
+# : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n
+# Source files in this directory
+SRCS = Startup.c GHExtension.c Comm.c SetupPGH.c
+
+# Subdirectories containing source files
+SUBDIRS =
+
diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c
new file mode 100644
index 0000000..9a5cf71
--- /dev/null
+++ b/src/pugh/SetupPGArray.c
@@ -0,0 +1,84 @@
+ /*@@
+   @file      SetupArray.c
+   @date      Fri Feb 21 11:04:20 1997
+   @author    
+   @desc 
+   Sets up (eg allocates) the pugh Array. This is trivial.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    SetupPGArray
+   @date       Fri Feb 21 11:05:04 1997
+   @author     
+   @desc 
+   Note that npoints is a GLOBAL measure.
+   @enddesc 
+   @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGArray *SetupPGArray(pGH *GH, char *name, int npoints) {
+  pGArray *res;			/* The result */
+  pGArray **rlist;			/* A temporary to add to the GH */
+  int lnpoints, i;
+
+  /* Space for the struct */
+  res = (pGArray *)malloc(sizeof(pGArray));
+
+  /* Set my parent GH */
+  res->parentGH = GH;
+
+  /* Set up the name */
+  res->name = (char *)malloc(strlen(name)*sizeof(char));
+  strcpy(res->name,name);
+
+  /* Allocate my memory */
+  if (npoints > 0) {
+    lnpoints = npoints/GH->nprocs;
+    if (GH->myproc == GH->nprocs - 1) { 
+      lnpoints = npoints - lnpoints * (GH->nprocs-1);
+    }
+    res->npoints = lnpoints;
+    res->data = (Double *)malloc(lnpoints*sizeof(Double));
+  } else {
+    res->npoints = 1;
+    res->data = (Double *)malloc(sizeof(Double));
+  }
+
+  /* Setup IO stuff to default no IO with space for xgraph files.*/
+  res->do_arrayio = 0;
+
+  res->IEEEfile = NULL;
+
+  /* Finally add this to the list of grid funcs in the gh */
+  GH->nArrays ++;
+  rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *));
+  if (GH->nArrays > 1) {
+    for (i=0;i<GH->nArrays-1;i++)
+      rlist[i] = GH->Arrays[i];
+    free(GH->Arrays);
+  }
+  res->pgano = GH->nArrays - 1;
+  rlist[GH->nArrays-1] = res;
+  GH->Arrays = rlist;
+
+  /* And return myself... */
+  return res;
+}
+
+void DestroyPGArray(pGH *GH, pGArray **pGAin) {
+  pGArray *pGA;
+  pGA = *pGAin;
+
+  free(pGA->name);
+  free(pGA->data);
+
+  free(pGA);
+  *pGAin=NULL;
+}
diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c
new file mode 100644
index 0000000..acec587
--- /dev/null
+++ b/src/pugh/SetupPGF.c
@@ -0,0 +1,438 @@
+ /*@@
+   @file      SetupPGF.c
+   @date      Fri Feb 21 11:04:20 1997
+   @author    
+   @desc 
+   Sets up (eg allocates) the pugh Grid Function. This is
+   crucial to understaiding the grid function struct.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    SetupPGF
+   @date       Fri Feb 21 11:05:04 1997
+   @author     
+   @desc 
+   Allocates the Grid Func structure (I wanted to say object ...)
+   There are several steps to this
+   <ul>
+     <li> Set up the name
+     <li> Set up the docomm flag and IO flags
+     <li> Allocate output file pointer arrays
+     <li> Allocate the data buffer
+     <li> Allocate the send buffer
+     <li> Add myself to the Grid Hierarchy
+   </ul>
+   But this is pretty straightforward code nonetheless.
+   <p>
+   Note that the input variable "storage" determines whether to
+   allocate the 3-D data or not. You can toggle this later
+   with @seeroutine DisableGFDataStorage and @seeroutine
+   EnableGFDataStorage
+   @enddesc 
+   @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) {
+  pGF *res;			/* The result */
+  pGF **rlist;			/* A temporary to add to the GH */
+  int i, dir, dirp1, dirp2, sz;	/* Counter thingies */
+
+  /* Space for the struct */
+  res = (pGF *)malloc(sizeof(pGF));
+
+  /* Set my parent GH */
+  res->parentGH = GH;
+
+  /* Set up the name */
+  res->name = (char *)malloc((1+strlen(name))*sizeof(char));
+  strcpy(res->name,name);
+
+  res->stagger = stagger;
+  if( (GH->stagger == PUGH_NO_STAGGER) &&
+      (res->stagger != PUGH_VERTEXCTR) )
+  {  printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n");
+     printf ("             designation PUGH_NO_STAGGER. \n");
+     STOP;
+  }
+
+
+  /* Setup IO stuff to default no IO with space for xgraph files.*/
+  res->do_3dio = 0;
+  res->do_2dio = 0;
+  res->do_1dio = 0;
+  res->do_0dio = 0;
+  res->do_conv = 0;
+  
+  res->convfile = NULL;
+  
+  res->xgfile = (FILE **)malloc(5*sizeof(FILE *));
+  for (i=0;i<5;i++)
+    res->xgfile[i] = NULL;
+
+  res->zerodfile = (FILE **)malloc(4*sizeof(FILE *));
+  for (i=0;i<4;i++)
+    res->zerodfile[i] = NULL;
+
+  res->IEEEfile = NULL;
+
+  for (i=0;i<3;i++) 
+    res->IEEEfile_2d[i] = NULL;
+  
+  res->show_maxval = 0;
+
+  res->lastio_it[0] = -1; res->lastio_it[1] = -1; 
+  res->lastio_it[2] = -1; res->lastio_it[3] = -1;
+  
+  SetGFComm(res, docomm);
+
+
+  /* Finally add this to the list of grid funcs in the gh */
+  GH->ngridFuncs ++;
+  rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *));
+  if (GH->ngridFuncs > 1) {
+    for (i=0;i<GH->ngridFuncs-1;i++)
+      rlist[i] = GH->gridFuncs[i];
+    free(GH->gridFuncs);
+  }
+  res->gfno = GH->ngridFuncs - 1;
+  rlist[GH->ngridFuncs-1] = res;
+  GH->gridFuncs = rlist;
+  /* Set up the comm layer */
+  res->padddata = NULL;
+  res->data = NULL;
+  res->send_buffer = NULL;
+  res->recv_buffer = NULL;
+
+  if (storage == PUGH_NOSTORAGE) {
+    res->storage = PUGH_STORAGE; /* Fake it out... */
+    DisableGFDataStorage(GH, res);
+  } else if (storage == PUGH_STORAGE) {
+    res->storage = PUGH_NOSTORAGE; /* Fake it out... */
+    EnableGFDataStorage(GH, res);
+  } else {
+    printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage);
+    printf ("         Perhaps misspelled in GridFuncList? Defaulting to storage active.\n");
+    EnableGFDataStorage(GH, res);
+  }
+
+#ifdef MPI
+  /* Null my send and recieve requests */
+  for (i=0;i<6;i++) {
+    res->sreq[i] = MPI_REQUEST_NULL;
+    res->rreq[i] = MPI_REQUEST_NULL;
+  }
+#endif
+
+  /* And return myself... */
+  return res;
+}
+
+ /*@@
+   @routine    DestroyPGF
+   @date       Thu Aug 21 11:44:22 1997
+   @author     Paul Walker
+   @desc 
+   Destroys a GF object.
+   @enddesc 
+@@*/
+
+void DestroyPGF(pGH *GH, pGF **GFin) {
+  pGF *GF;
+  GF = *GFin;
+
+  if (GF->storage)
+    DisableGFDataStorage(GH, GF);
+  
+  free(GF->name);
+  free(GF->padddata);
+
+  free(GF->xgfile);
+  free(GF->zerodfile);
+
+  GF->data = NULL;
+
+  free(GF);
+  *GFin = NULL;
+
+}
+
+
+ /*@@
+   @routine    EnableGFDataStorage
+   @date       Thu Apr  3 12:44:38 1997
+   @author     Paul Walker
+   @desc 
+   This routine toggles the data storage to the "on"
+   position. That means that data points to a 3D
+   array, rather than a single Double, and the
+   storage flag is set to one. This is used quite
+   a lot by thorns which need to toggle memory. for
+   instance, see the trK Driver in the util thron.
+   @enddesc 
+@@*/
+
+/* Statics for padding */
+static int padding_active = -1;
+static int padding_cacheline_bits, padding_size, padding_address_spacing;
+
+int zero_memory = -1;
+
+void EnableGFDataStorage(pGH *GH, pGF *GF) {
+  int i, dir, dirp1, dirp2, sz;
+  int special_pad, cache_size, start;
+
+  if (zero_memory < 0) {
+    zero_memory = Contains("zero_memory","yes");
+    if (zero_memory) {
+      printf ("Zeroing memory for allocated GFs at alloc time\n");
+    }
+  }
+
+  
+  if (padding_active == -1) {
+    padding_active = Contains("padding_active","yes");
+    padding_cacheline_bits = Geti("padding_cacheline_bits");
+    padding_size = Geti("padding_size");
+    padding_address_spacing = Geti("padding_address_spacing");
+    if (padding_active) {
+      printf ("PUGH Memory padding active. Stats:\n");
+      printf ("  cacheline_bits   : %d\n",
+	      padding_cacheline_bits);
+      printf ("  size             : %d\n",
+	      padding_size);
+      printf ("  spacing          : %d\n",
+	      padding_address_spacing);
+    }
+  }
+
+  if (GF->storage) {
+    printf ("WARNING: Tried to enable %s when already enabled\n",
+	    GF->name);
+    return;
+  }
+
+  /* Set up the send buffers. Note we only do this if
+     we are not using the derived types comm style.
+     */
+  assert(!GF->send_buffer);
+  GF->send_buffer = (Double **)malloc(6*sizeof(Double *));
+
+  assert(!GF->recv_buffer);
+  GF->recv_buffer = (Double **)malloc(6*sizeof(Double *));
+  
+  for (i=0;i<6;i++) {
+    dir = i/2;
+    dirp1 = (i/2+1)%3;
+    dirp2 = (i/2+2)%3;
+    
+    if (dir == 0) {
+      sz = GH->stencil_width * GH->lny * GH->lnz;
+    } else if (dir == 1) {
+      sz = GH->stencil_width * GH->lnx * GH->lnz;
+    } else {
+      sz = GH->stencil_width * GH->lnx * GH->lny;
+    }
+    
+    if (GH->neighbors[GH->myproc][i] >= 0) {
+      GF->buffer_sz[i] = sz;
+      GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+      GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+      assert(GF->recv_buffer[i]);
+      assert(GF->send_buffer[i]);
+    } else {
+      GF->buffer_sz[i] = 0;
+      GF->send_buffer[i] = NULL;
+      GF->recv_buffer[i] = NULL;
+    }
+  }
+
+  /* Set up the storage */
+  if (GF->padddata) free (GF->padddata);
+  if (!padding_active) {
+    GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double));
+    GF->data     = GF->padddata;	/* No padding case */
+    if (zero_memory) {
+      for (i=0;i<GH->npoints;i++)
+        GF->padddata[i] = 0.0;
+    }
+  } else {
+    GF->padddata  = (Double *)malloc((GH->npoints + padding_size)
+				    *sizeof(Double));
+   
+    /* Align the actual starting address on a cache line.
+       More specifically align on a unique cache line for
+       each field. 
+       */
+    cache_size = 2 << (padding_cacheline_bits - 1);
+    start = ((long) (GF->padddata) / sizeof(Double))%cache_size;
+
+    special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size;
+    GF->data = (GF->padddata) + special_pad;
+
+    if (special_pad > padding_size){
+      printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size);
+      STOP;
+    }
+    if (zero_memory) {
+      for (i=0;i<GH->npoints+padding_size;i++)
+        GF->padddata[i] = 0.0;
+    }
+  }
+
+  if (!GF->padddata) {
+    printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n",
+	    GF->name, GF->gfno);
+    STOP;
+  }
+  GF->storage = 1;
+
+
+}
+
+ /*@@
+   @routine    DisableGFDataStorage
+   @date       Thu Apr  3 12:45:34 1997
+   @author     Paul Walker
+   @desc 
+   This routine disables the grid function storage.
+   That is, it un-allocates the 3D array and in its
+   place allocates a single Double with the value
+   0.0. This allows us to still ahve something to
+   pass around (an array of size (1,1,1) in fortran
+   speak) but also to not need all our 3D arrays 
+   "on" all the time.
+   @enddesc 
+@@*/
+
+
+void DisableGFDataStorage(pGH *GH, pGF *GF) {
+  int i;
+
+  if (!GF->storage) {
+    printf ("Warning: Tried to disable %s when already disabled\n",
+	    GF->name);
+    return;
+  }
+
+  if (GF->padddata) {
+    free(GF->padddata);
+    GF->padddata = NULL;
+    GF->data = NULL;
+  }
+
+  if (GF->send_buffer) {
+    for (i=0;i<6;i++)
+      if (GF->send_buffer[i]) {
+#ifdef MPI
+	if(GF->sreq[i] != MPI_REQUEST_NULL){
+	  CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i])));
+	}
+#endif
+    	free(GF->send_buffer[i]);
+	GF->send_buffer[i] = NULL;
+      }
+    free(GF->send_buffer);
+    GF->send_buffer = NULL;
+  }
+
+
+  if (GF->recv_buffer) {
+    for (i=0;i<6;i++)
+      if (GF->recv_buffer[i]) {
+	free(GF->recv_buffer[i]);
+	GF->recv_buffer[i] = NULL;
+      }
+    free(GF->recv_buffer);
+    GF->recv_buffer = NULL;
+  }
+
+  GF->padddata = (Double *)malloc(sizeof(Double));
+  GF->data = GF->padddata;
+  GF->data[0] = 0.0;		/* Very important! */
+  GF->storage = 0;
+}
+
+ /*@@
+   @routine    SetGFComm
+   @date       Thu Apr  3 12:46:23 1997
+   @author     Paul Walker
+   @desc 
+   This sets the docomm[3] array of the GF based
+   on the setting of the comm flag. Note the
+   comm flag constants are defined in
+   @seeheader pughDriver.h. As well as SetupPGF
+   this is called by thorns (eg, elliptic) which
+   need to toggle this state.
+   @enddesc 
+   @calledby   SetupPGF
+@@*/
+
+
+void SetGFComm(pGF *res, int docomm) {
+  /* Copy docomm */
+  int i;
+  res->commflag = docomm;
+
+  for (i=0;i<6;i++)
+    res->docomm[i] = 0;
+
+  if (docomm == PUGH_NOCOMM) {
+    for (i=0;i<6;i++)
+      res->docomm[i] = 0;
+  } else if (docomm == PUGH_ALLCOMM) {
+    for (i=0;i<6;i++)
+      res->docomm[i] = 1;
+  } else if (docomm == PUGH_XCOMM) {
+    res->docomm[0] = 1;
+    res->docomm[1] = 1;
+  } else if (docomm == PUGH_YCOMM) {
+    res->docomm[2] = 1;
+    res->docomm[3] = 1;
+  } else if (docomm == PUGH_ZCOMM) {
+    res->docomm[4] = 1;
+    res->docomm[5] = 1;
+  } else if (docomm == PUGH_PLUSFACESCOMM) {
+    res->docomm[1] = 1;
+    res->docomm[3] = 1;
+    res->docomm[5] = 1;
+  } else if (docomm == PUGH_MINUSFACESCOMM) {
+    res->docomm[0] = 1;
+    res->docomm[2] = 1;
+    res->docomm[4] = 1;
+  } else {
+    printf("WARNING: Unsupported comm. in grid function %s .\n",res->name);
+    printf("         Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n");
+    for (i=0;i<6;i++)
+      res->docomm[i] = 1;
+  }
+
+  /* Handle nx = 1 type cases. This is only important for one
+     processor MPI periodic boundaries.
+   */
+  /* This is a hack to get a GH. Unfortunately I decided not
+     to pass a GH to this routine, and I should go back and
+     change it, but that is a lot of changes so for now I'll
+     do the following.
+    */
+  if (GHList->GH->nx == 1) {
+    res->docomm[0] = 0;
+    res->docomm[1] = 0;
+  }
+
+  if (GHList->GH->ny == 1) {
+    res->docomm[2] = 0;
+    res->docomm[3] = 0;
+  }
+
+  if (GHList->GH->nz == 1) {
+    res->docomm[4] = 0;
+    res->docomm[5] = 0;
+  }
+}
diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c
new file mode 100644
index 0000000..58e6a07
--- /dev/null
+++ b/src/pugh/SetupSliceCenter.c
@@ -0,0 +1,137 @@
+ /*@@
+   @file      SetupSliceCenter.c
+   @date      
+   @author    
+   @desc 
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+void SetupSliceCenter(pGH *GH) {
+  int slice_center;
+ 
+
+  /* slice_center used to be part of the GF, */ 
+  /* either 1 (default) or 0 (octant,edge)*/
+  /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */
+ 
+  if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) {
+    printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)");
+    printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n",
+	   GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]);
+    return;
+  }
+
+  slice_center=1;
+  if (Contains("grid","octant") || Contains("grid","quadrant") ||
+      Contains("grid","bitant"))
+    slice_center=0;
+  if (Contains("slice_loc","edge")) slice_center=0;
+
+  if (slice_center) {
+    /* For NONE octant mode: the center for slicings */
+    /* is moved to the center index */
+    /* (unless nx,ny,nz=1) */  
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+    }
+
+    if (GH->ny == 1) {
+      GH->spxyz[1] = 0;
+      GH->sp2dxyz[1] = 0;
+    } else {
+      GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+      GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+    }
+
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+    }
+  } 
+  /* for octant mode, the slicing center is the index 1 or zero*/
+  else {
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] =  1 - GH->lb[GH->myproc][0];
+    }
+
+    if (GH->ny == 1) {
+      GH->spxyz[1] = 0;
+      GH->sp2dxyz[1] = 0;
+    } else {
+      GH->spxyz[1] = 1 - GH->lb[GH->myproc][1];      
+      GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1];      
+    }
+
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = 1 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2];
+    }
+  }
+
+  /* Special case: in quadrant mode x and y are like full, but
+     z is like octant. Thinking about this gives you a headache,
+     but works.
+  */
+  if (Contains("grid","quadrant")) {
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+    }
+  }
+
+  /* Another special case: bitant!  Hey, with full and octant
+     already implemented, bitant is a breeze on the neurons */
+  if (Contains("grid","bitant"))
+  {  if (GH->nx == 1)
+     {  GH->spxyz[0] = 0;
+        GH->sp2dxyz[0] = 0;
+     }
+     else
+     {  GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+        GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+     }
+     if (GH->ny == 1)
+     {  GH->spxyz[1] = 0;
+        GH->sp2dxyz[1] = 0;
+     }
+     else
+     {  GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+        GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+     }
+  }
+
+#ifdef THORN_CARTOON_2D
+  /* for cartoon_2d, treat x as in octant, y and z as in full, BB */
+  if (Contains("cartoon_active","yes")) {
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] =  1 - GH->lb[GH->myproc][0];
+    }
+  }
+#endif
+}
diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c
new file mode 100644
index 0000000..2d21ae2
--- /dev/null
+++ b/src/pugh/SyncGroupGF.c
@@ -0,0 +1,271 @@
+/*@@
+  @file SyncGroupGH.c
+  @author Paul Walker
+  @date March 1997
+  @desc
+  Synchronizes all the Grid Functions (which have their communications
+  on) in a given group which has previously been registered to 
+  the GF data base with StoreGFGroupData. The MPI section was
+  formally called SyncGH.c
+  @enddesc
+  @history
+  @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+  @hdesc Added the functionality to synchronise indices from a group 
+         rather than the whole GH. Changed the routine/file names.
+  @endhistory
+  @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+/*#define VERBOSE*/
+#undef COMM_TIMING
+
+int GetGFGroupData(const char *name, int *nGFs, int **GFIndices);
+
+/*@@
+  @routine SyncGroupGF
+  @author Paul Walker
+  @date March 1997
+  @desc
+  Synchronizes all the Grid Functions (which have their communications
+  on) in a given group which has previously been registered to 
+  the GF data base with StoreGFGroupData. The MPI section was
+  formally called SyncGH.c
+  @enddesc
+  @history
+  @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+  @hdesc Added the functionality to synchronise indices from a group 
+         rather than the whole GH. Changed the routine/file names.
+  @endhistory
+  @@*/
+
+
+/*  This routine works in one of two ways:
+ *
+ *  1. If using derived data types, it posts receives, posts sends
+ *     waits for all sends and receives, and finishes the receives.
+ *
+ *  2. If using buffered communication, it posts recieves, posts
+ *     sends, waits for receives only, and finishes them. We don't
+ *     need to wait for sends until the next time through. This
+ *     is why the wait for the send buffer is immediately before
+ *     the post send. (Since the send buffers are initially set
+ *     to MPI_REQUEST_NULL the first Wait and any waits in
+ *     unused directions are a wash).
+ *
+ *  This routine is intertwined with the contents of routines
+ *  pGF_PostRecv, pGF_PostSend, and pGF_FinishSend. 
+ *
+ *  Without MPI, this does nothing, of course.
+ *
+ *  Note this routine is also responsible for updating the
+ *  commt global so we can get a measure of how much time we
+ *  spend in communications overhead.
+ *
+ *  Also note that it is more efficient to sync a number of 
+ *  GF together (as a group or a hierachy) rather than 
+ *  singly. This is because syncing single functions waits for the
+ *  receives on a single function before posting the sends on 
+ *  all the other functions.
+ */
+
+void SyncGroupGF(pGH *GH,const char *groupname) {
+#ifdef MPI
+  int j, Dir, index;
+  double t1,t2,t3;
+  MPI_Request *sr = NULL;
+  MPI_Status ms, mss;
+#endif
+  int i;
+  double st[PUGH_NTIMERS],et[PUGH_NTIMERS];
+  int l;
+  int *GFindices; 
+  int nGFs;
+
+  pughTimers(st);
+
+  /* Find which grid functions make up the group */
+  GetGFGroupData(groupname,&nGFs,&GFindices);
+
+  /* Say which grid functions will actually be synchronised */
+  if (Contains("cactus_sync_verbose","yes")) { 
+    printf("Syncing %d grid functions in group %s\n",nGFs,groupname);
+    printf("  Syncing GFs ...");
+    for (i=0;i<nGFs;i++) {
+      if (GH->gridFuncs[GFindices[i]]->storage 
+          && (GH->gridFuncs[GFindices[i]]->docomm[1] ||
+              GH->gridFuncs[GFindices[i]]->docomm[2] ||
+              GH->gridFuncs[GFindices[i]]->docomm[3] ||
+              GH->forceSync))
+	printf(" %s",GH->gridFuncs[GFindices[i]]->name);
+    }
+    printf("\n");
+  }
+
+#ifdef MPI
+  if (GH->commmodel == PUGH_DERIVEDTYPES) {
+    /* 2 faces, send and receive is the 2 * 2 */
+    sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request));
+    for (i=0;i<nGFs * 2 * 2; i++)
+      sr[i] = MPI_REQUEST_NULL;
+  }
+
+  for (Dir = 0; Dir < 3; Dir ++) {
+#ifdef COMM_TIMING
+    t1 = MPI_Wtime();
+#endif
+    for(i=0;i<nGFs;i++) {
+      pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+      pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+    }
+    
+#ifdef COMM_TIMING
+    t2 = MPI_Wtime();
+    printf ("PR : %lf\n",t2-t1);
+#endif
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      for(i=0;i<nGFs;i++) {
+	/* Wait for the last send. Since these will be null if the
+	   are not used, this is always safe.
+	*/
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    } else {
+      for(i=0;i<nGFs;i++) {
+	/* Wait for the last send. Since these will be null if the
+	   are not used, this is always safe.
+	*/
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    }
+
+#ifdef COMM_TIMING
+    t1 = MPI_Wtime();
+    printf ("PS : %lf\n",t1-t2);
+#endif
+
+    /* Now comes the big difference between derived types and
+       allocated buffers. With derived types, we now have to
+       wait on all our recieve AND SEND buffers so we can
+       keep on using the send buffers ( as communications are
+       in-place). With the allocated we have to wait on each
+       recieve, but not on the send, since we don't need the
+       send buffer until we pack a send again (above)
+    */
+    
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      /* Do a wait any on the recieves */
+      for(i=0;i<nGFs;i++) {
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss);
+	pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss);
+	pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    } 
+
+    if (GH->commmodel == PUGH_DERIVEDTYPES) {
+      /* Load up the thing for the waitall */
+      for (i=0;i<nGFs;i++) {
+	int id = i*4;
+	if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] &&
+	    GH->gridFuncs[GFindices[i]]->storage) {
+	  sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir];
+	  sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir];
+	} else {
+	  sr[id] = MPI_REQUEST_NULL;
+	  sr[id+1] = MPI_REQUEST_NULL;
+	}
+	  
+	if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] &&
+	    GH->gridFuncs[GFindices[i]]->storage) {
+	  sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1];
+	  sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1];
+	} else {
+	  sr[id+2] = MPI_REQUEST_NULL;
+	  sr[id+3] = MPI_REQUEST_NULL;
+	}
+      }
+      /* Now do a waitall */
+      MPI_Waitall(4*nGFs,sr,&ms);
+	  
+    }
+    if (sr) free(sr);
+#ifdef COMM_TIMING
+    t2 = MPI_Wtime();
+    printf ("FR : %lf\n",t2-t1);
+#endif
+  }
+#endif
+  pughTimers(et);
+  for (l=0;l<PUGH_NTIMERS;l++)
+    commt[l] += et[l]-st[l];
+}
+
+
+/*@@
+  @routine SyncGH
+  @author Gabrielle Allen
+  @date Sep 27 1998
+  @desc
+  Wrapper to support SyncGH command, which is now implemented through 
+  a group.
+  @enddesc
+  @@*/
+
+void SyncGH(pGH *GH) {
+
+  SyncGroupGF(GH,"$GH");
+
+}
+
+
+/*@@
+  @routine SyncGF
+  @author Gabrielle Allen
+  @date Sep 27 1998
+  @desc
+  Wrapper to support SyncGF command, which is now implemented through 
+  a group. Note that it is more efficient to sync all required GFs at 
+  the same time, using the @seeroutine SyncGroupGF routine with a given group.
+  @enddesc
+  @@*/
+
+void SyncGF(pGH *GH,pGF *GF) {
+
+  SyncGroupGF(GH,GF->name);
+
+}
+
+
+/* The old code for SyncGF is listed here, since it illustrates simply
+ * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv
+ * */
+
+/*  void SyncGF(pGH *GH, pGF *GF) { */
+
+/* #ifdef MPI */
+/*    int Dir; */
+/*    MPI_Status mss; */
+/*    for (Dir=0;Dir<3;Dir++) { */
+/*       pGF_PostRecv(GH, GF, 2*Dir); */
+/*       pGF_PostRecv(GH, GF, 2*Dir+1); */
+
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */
+/*      pGF_PostSend(GH, GF, 2*Dir); */
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */
+/*       pGF_PostSend(GH, GF, 2*Dir+1); */
+
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */
+/*       pGF_FinishRecv(GH, GF, 2*Dir); */
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */
+/*       pGF_FinishRecv(GH, GF, 2*Dir+1); */
+/*    } */
+/* #endif */
+/* } */
+ 
diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c
new file mode 100644
index 0000000..316dfc1
--- /dev/null
+++ b/src/pugh/pGF_FinishRecv.c
@@ -0,0 +1,81 @@
+ /*@@
+   @file      pGF_FinishRecv.c
+   @date      Thu Apr  3 11:37:28 1997
+   @author    Paul Walker
+   @desc 
+   The routine which finalize the MPI recieves for a grid
+   function. Critically linked with @seefile pGF_PostRecv.c
+   and @seefile pGF_PostSend.c
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_FinishRecv
+   @date       Thu Apr  3 11:38:07 1997
+   @author     Paul Walker
+   @desc 
+   This routine finalizes the MPI communication through a face.
+   It is crucially linked with @seeroutine pGF_PostRecv and
+   @seeroutine pGF_PostSend.
+   <p>
+   <b>Important!</b>
+   This routine does <b>not</b> wait on the recieves.
+   Before it is called, you must do the MPI_Wait or
+   else you will not get the right answer. for an
+   example of this, see @seeroutine SyncGroupGF or
+   @seeroutine SyncGroupGF
+   @enddesc 
+   @calledby   SyncGroupGF
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+@@*/
+
+void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+  int R;
+  MPI_Status ms;
+  int istart, iend, jstart, jend, kstart, kend;
+  int ii,jj,kk,xx;
+  int tmp;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    /* Here wait one at a time, so the others can arrive while
+       we copy the data back in... */
+    
+#ifdef DEBUG_PRINT
+    printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n",
+	    GF->name,dir,GH->myproc,GF->rreq[dir]);
+#endif
+
+    /* Copy buffer onto GF Storage */
+    istart = GH->ghosts[GF->stagger][dir][0][0];
+    iend   = GH->ghosts[GF->stagger][dir][0][1];
+    jstart = GH->ghosts[GF->stagger][dir][1][0];
+    jend   = GH->ghosts[GF->stagger][dir][1][1];
+    kstart = GH->ghosts[GF->stagger][dir][2][0];
+    kend   = GH->ghosts[GF->stagger][dir][2][1];
+    
+    /* Great now copy. Note ordering is just link in PostSend */
+    xx=0;
+    for (kk=kstart; kk<kend; kk++) 
+      for (jj=jstart; jj<jend; jj++)
+	for (ii=istart; ii<iend; ii++)
+	  GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++];
+    
+  }
+
+#endif
+}
diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c
new file mode 100644
index 0000000..81c4a30
--- /dev/null
+++ b/src/pugh/pGF_PostRecv.c
@@ -0,0 +1,69 @@
+ /*@@
+   @file      pGF_PostRecv.c
+   @date      Fri Feb 21 11:28:06 1997
+   @author    
+   @desc 
+   Routines which post all the IRecv commands for a GF. These
+   allow the asyncronous model proposed in, for example,
+   @seeroutine SyncGF to go!
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine   pGF_PostRecv
+   @date      Thu Apr  3 12:10:46 1997
+   @author    Paul Walker
+   @desc 
+   This routine posts a recieve (MPI_Irecv) of the buffer
+   we want to get the send from another processor, which
+   will be sent by @seeroutine pGF_PostSend and then
+   finalized by @seeroutoine pGF_FinishRecv. 
+   <p>
+   Aside from a silly calculation to get a unique tag
+   based on neigbors and processors, this is a very
+   straightforward routine.
+   @enddesc 
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+ @@*/
+
+void pGF_PostRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+  int R;
+  int rtag;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno);
+    /* mod the tag to force MPI compliance */
+    rtag = rtag % 32768;
+#ifdef DEBUG_PRINT
+    printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n",
+	    GF->name,dir,GH->myproc,rtag,
+	    GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS)
+      CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]),
+		    GF->buffer_sz[dir],PUGH_MPI_TYPE,
+		    GH->neighbors[GH->myproc][dir],rtag,
+		    GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+    if (GH->commmodel == PUGH_DERIVEDTYPES)
+      CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data),
+		    1,GH->recv_dt[GF->stagger][dir],
+		    GH->neighbors[GH->myproc][dir],rtag,
+		    GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+  }
+#endif
+}
diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c
new file mode 100644
index 0000000..d8ec630
--- /dev/null
+++ b/src/pugh/pGF_PostSend.c
@@ -0,0 +1,116 @@
+ /*@@
+   @file      pGF_PostSend.c
+   @date      Thu Apr  3 11:58:14 1997
+   @author    Paul Walker
+   @desc 
+   The send-arm of the three-way pugh communcations branch
+   which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c
+   and @seefile pGF_FinishRecv.c. See the documentation for
+   @seeroutine pGF_PostSend for details on this routine.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_PostSend
+   @date       Thu Apr  3 11:58:59 1997
+   @author     Paul Walker
+   @desc 
+   This routine posts an asyncronous MPI send of a buffer
+   for a face (MPI_Isend). It does this by first packing up
+   a send buffer (allocated in @seeroutine SetupPGF) then
+   sending it out on the pipe.
+   <p>
+   Since this is an asynchronous communications model we
+   assume that a recieve has been posted for the send.
+   thus the correct calling order for this is as in
+   @seeroutine SyncGF, eg, after a Recv.
+   <p>
+   Note this does <b>not</b> wait on the send buffer from previous
+   communications. It is the users responsibility to wait on
+   that buffer.
+   @enddesc 
+   @calledby   SyncGroupGF
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+@@*/
+
+void pGF_PostSend(pGH *GH, pGF *GF, int dir) {
+  int R, stag, dircomp;
+  int tmp;
+  int ii,jj,kk,xx;
+  int istart, iend;
+  int jstart, jend;
+  int kstart, kend;
+
+#ifdef MPI
+  Double ts, tw, tp;
+
+  MPI_Status ms;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    ts = MPI_Wtime();
+    dircomp = dir+1;		/* Complementary direction */
+    if (dircomp %2 == 0) dircomp = dir-1;
+    /* Note this is the complement of the rtag set in PostRecv */
+    stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] + 
+			       GH->nprocs * GF->gfno);
+    /* mod the tag to force MPI compliance */
+    stag = stag % 32768;
+
+#ifdef DEBUG_PRINT
+    printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n",
+	    GF->name,dir,GH->myproc,stag,
+	    GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+
+    if (GH->commmodel == PUGH_DERIVEDTYPES)
+      CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data),
+		    1,GH->send_dt[GF->stagger][dir],
+		    GH->neighbors[GH->myproc][dir],stag,
+		    GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      /* Copy my information into the send buffer */
+      istart = GH->overlap[GF->stagger][dir][0][0];
+      iend   = GH->overlap[GF->stagger][dir][0][1];
+      jstart = GH->overlap[GF->stagger][dir][1][0];
+      jend   = GH->overlap[GF->stagger][dir][1][1];
+      kstart = GH->overlap[GF->stagger][dir][2][0];
+      kend   = GH->overlap[GF->stagger][dir][2][1];
+      
+      /* Great now copy */
+      xx = 0;
+      for (kk=kstart; kk<kend; kk++) 
+	for (jj=jstart; jj<jend; jj++)
+	  for (ii=istart; ii<iend; ii++)
+	    GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)];
+      
+      tp = MPI_Wtime();
+      
+      /* post send */
+      CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]),
+		    GF->buffer_sz[dir],PUGH_MPI_TYPE,
+		    GH->neighbors[GH->myproc][dir],stag,
+		    GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+      tw = MPI_Wtime();
+#ifdef COMM_TIMING
+      printf ("GF %s Dir %d Time %lf\n",
+	      GF->name, dir, tw-ts);
+#endif
+    }
+  }
+#endif
+}
diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c
new file mode 100644
index 0000000..bc8e211
--- /dev/null
+++ b/src/pugh/pGF_Reduction.c
@@ -0,0 +1,198 @@
+ /*@@
+   @file      pGF_Reduction.c
+   @date      Thu Apr  3 11:54:53 1997
+   @author    Paul Walker
+   @desc 
+   Various MPI reduction operators for maxima, minima, norms
+   and the like on grid functions. These are all pretty
+   straighforward thanks to the miracle of MPI_Allreduce.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_MaxVal
+   @date       Thu Apr  3 11:55:25 1997
+   @author     Paul Walker
+   @desc 
+   Returns the maximum value of a distributed grid function.
+   @enddesc 
+@@*/
+
+
+Double pGF_MaxVal(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i;
+  
+  if (!GF->storage) return GF->maxval;
+
+  res = GF->data[0];
+  for (i=1;i<GH->npoints;i++)
+    res = (GF->data[i] > res? GF->data[i] : res);
+
+
+  /* So now res is the local max */
+#ifdef MPI
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD));
+#endif
+
+  return res;
+}
+
+
+ /*@@
+   @routine    pGF_MinVal
+   @date       Thu Apr  3 11:55:48 1997
+   @author     Paul Walker
+   @desc 
+   Returns the minimum value of a distributed grid function.
+   @enddesc 
+@@*/
+
+Double pGF_MinVal(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i;
+
+  if (!GF->storage) return GF->minval;
+
+  res = GF->data[0];
+  for (i=1;i<GH->npoints;i++)
+    res = (GF->data[i] < res ? GF->data[i] : res);
+
+  /* So now res is the local max */
+#ifdef MPI
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD));
+#endif
+
+  return res;
+}
+
+
+
+ /*@@
+   @routine    pGF_Norm1
+   @date       Thu Apr  3 11:56:05 1997
+   @author     Paul Walker
+   @desc 
+   Returns the average of a distributed grid function.
+   @enddesc 
+@@*/
+
+Double pGF_Norm1(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i, ii, jj, kk;
+  int is,ie,js,je,ks,ke;
+  int np,tnp;
+
+  if (!GF->storage) return GF->norm1;
+
+
+#define ABS(x) ((x)<0?-(x):(x))
+
+#ifndef MPI
+  res = 0.0;
+  for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]);
+  tnp = GH->npoints;
+
+#else
+  res = 0.0;
+  np  = 0;
+  for (kk=GH->ownership[GF->stagger][2][0]; 
+       kk < GH->ownership[GF->stagger][2][1];
+       kk ++)
+    for (jj = GH->ownership[GF->stagger][1][0]; 
+	 jj < GH->ownership[GF->stagger][1][1]; 
+	 jj ++)
+      for (ii = GH->ownership[GF->stagger][0][0];  
+	   ii < GH->ownership[GF->stagger][0][1]; 
+	   ii ++) {
+	res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]);
+	np++;
+      }
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+  CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+  
+  res = res/tnp;
+  return res;
+}
+
+
+ /*@@
+   @routine    pGF_Norm2
+   @date       Thu Apr  3 11:56:36 1997
+   @author     Paul Walker
+   @desc 
+   Returns the "norm2" of a distributed grid function. That
+   is, returns $\sqrt{\Sigma (a_i * a_i) / np}$.
+   @enddesc 
+@@*/
+
+
+Double pGF_Norm2(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i, ii, jj, kk;
+  int is,ie,js,je,ks,ke;
+  int np, tnp;
+
+  if (!GF->storage) return GF->norm2;
+
+#ifndef MPI
+  res = 0.0;
+  for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i];
+  tnp = GH->npoints;
+  
+#else
+  /* MPI version depends heavily on the ownership array */
+  res = 0.0;
+  np  = 0;
+
+  for (kk=GH->ownership[GF->stagger][2][0]; 
+       kk < GH->ownership[GF->stagger][2][1]; 
+       kk ++)
+    for (jj = GH->ownership[GF->stagger][1][0]; 
+	 jj < GH->ownership[GF->stagger][1][1]; 
+	 jj ++)
+      for (ii = GH->ownership[GF->stagger][0][0]; 
+	   ii < GH->ownership[GF->stagger][0][1]; 
+	   ii ++) {
+	res += GF->data[DATINDEX(GH,ii,jj,kk)] *
+	  GF->data[DATINDEX(GH,ii,jj,kk)];
+	np ++;
+      }
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+  CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+  res = sqrt(res/tnp);
+  return res;
+}
+
+ /*@@
+   @routine    pGF_AllNorms
+   @date       Mon Jun 30 10:44:57 1997
+   @author     Paul Walker
+   @desc 
+   Returns all 4 norms into a Double pointer in the order
+   max min norm1 norm2
+   <p>
+   This is purely a convenience function.
+   @enddesc 
+   @calls      pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2
+@@*/
+
+void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) {
+  res[0] = pGF_MaxVal(GH,GF);
+  res[1] = pGF_MinVal(GH,GF);
+  res[2] = pGF_Norm1(GH,GF);
+  res[3] = pGF_Norm2(GH,GF);
+}
+
diff --git a/src/pugh_Comm.h b/src/pugh_Comm.h
new file mode 100644
index 0000000..0108c7b
--- /dev/null
+++ b/src/pugh_Comm.h
@@ -0,0 +1,52 @@
+ /*@@
+   @header    pugh_Comm.h
+   @date      Thu Feb  4 11:42:50 1999
+   @author    Tom Goodale
+   @desc 
+   
+   @enddesc 
+   @version $Header$
+ @@*/
+
+#ifndef _PUGH_COMM_H_
+#define _PUGH_COMM_H_
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+  /* Overloaded functions. */
+int pugh_SyncGroup(cGH *GH, const char *group);
+int pugh_EnableGroupStorage(cGH *GH, const char *group);
+int pugh_EnableGroupStorage(cGH *GH, const char *group);
+
+int pugh_EnableGroupComm(cGH *GH, const char *group);
+int pugh_EnableGroupComm(cGH *GH, const char *group);
+
+int pugh_Barrier(cGH *GH);
+int pugh_Reduce(cGH *GH,                
+		const char *operation,
+		int n_infields,
+		int n_outfields,
+		int out_type,
+		void **outarray,
+		...);
+
+int pugh_Interp(cGH *GH,
+		const char *operation,
+		int n_coords,
+		int n_infields,
+		int n_outfields,
+		int n_points,
+		int type,
+		...);
+
+int pugh_ParallelInit(cGH *GH);
+int pugh_Exit(cGH *GH);
+int pugh_Abort(cGH *GH);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/src/pugh_extension.h b/src/pugh_extension.h
new file mode 100644
index 0000000..4307ccd
--- /dev/null
+++ b/src/pugh_extension.h
@@ -0,0 +1,30 @@
+ /*@@
+   @header    pugh_extension.h
+   @date      Thu Feb  4 10:51:27 1999
+   @author    Tom Goodale
+   @desc 
+   The header file for PUGH GH extension stuff.
+   @enddesc 
+   @version $Header$
+ @@*/
+
+#ifndef _PUGH_EXTENSION_H_
+#define _PUGH_EXTENSION_H_
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+void *pugh_SetupGH(tFleshConfig *config, 
+		   int convergence_level, 
+		   cGH *GH);
+
+int pugh_InitGH(cGH *GH);
+
+int pugh_rfrTraverseGH(cGH *GH, int rfrpoint);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif