8 files changed, 1394 insertions, 0 deletions
diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c
new file mode 100644
index 0000000..9a5cf71
--- /dev/null
+++ b/src/pugh/SetupPGArray.c
@@ -0,0 +1,84 @@
+ /*@@
+   @file      SetupArray.c
+   @date      Fri Feb 21 11:04:20 1997
+   @author    
+   @desc 
+   Sets up (eg allocates) the pugh Array. This is trivial.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    SetupPGArray
+   @date       Fri Feb 21 11:05:04 1997
+   @author     
+   @desc 
+   Note that npoints is a GLOBAL measure.
+   @enddesc 
+   @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGArray *SetupPGArray(pGH *GH, char *name, int npoints) {
+  pGArray *res;			/* The result */
+  pGArray **rlist;			/* A temporary to add to the GH */
+  int lnpoints, i;
+
+  /* Space for the struct */
+  res = (pGArray *)malloc(sizeof(pGArray));
+
+  /* Set my parent GH */
+  res->parentGH = GH;
+
+  /* Set up the name */
+  res->name = (char *)malloc(strlen(name)*sizeof(char));
+  strcpy(res->name,name);
+
+  /* Allocate my memory */
+  if (npoints > 0) {
+    lnpoints = npoints/GH->nprocs;
+    if (GH->myproc == GH->nprocs - 1) { 
+      lnpoints = npoints - lnpoints * (GH->nprocs-1);
+    }
+    res->npoints = lnpoints;
+    res->data = (Double *)malloc(lnpoints*sizeof(Double));
+  } else {
+    res->npoints = 1;
+    res->data = (Double *)malloc(sizeof(Double));
+  }
+
+  /* Setup IO stuff to default no IO with space for xgraph files.*/
+  res->do_arrayio = 0;
+
+  res->IEEEfile = NULL;
+
+  /* Finally add this to the list of grid funcs in the gh */
+  GH->nArrays ++;
+  rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *));
+  if (GH->nArrays > 1) {
+    for (i=0;i<GH->nArrays-1;i++)
+      rlist[i] = GH->Arrays[i];
+    free(GH->Arrays);
+  }
+  res->pgano = GH->nArrays - 1;
+  rlist[GH->nArrays-1] = res;
+  GH->Arrays = rlist;
+
+  /* And return myself... */
+  return res;
+}
+
+void DestroyPGArray(pGH *GH, pGArray **pGAin) {
+  pGArray *pGA;
+  pGA = *pGAin;
+
+  free(pGA->name);
+  free(pGA->data);
+
+  free(pGA);
+  *pGAin=NULL;
+}
diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c
new file mode 100644
index 0000000..acec587
--- /dev/null
+++ b/src/pugh/SetupPGF.c
@@ -0,0 +1,438 @@
+ /*@@
+   @file      SetupPGF.c
+   @date      Fri Feb 21 11:04:20 1997
+   @author    
+   @desc 
+   Sets up (eg allocates) the pugh Grid Function. This is
+   crucial to understaiding the grid function struct.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    SetupPGF
+   @date       Fri Feb 21 11:05:04 1997
+   @author     
+   @desc 
+   Allocates the Grid Func structure (I wanted to say object ...)
+   There are several steps to this
+   <ul>
+     <li> Set up the name
+     <li> Set up the docomm flag and IO flags
+     <li> Allocate output file pointer arrays
+     <li> Allocate the data buffer
+     <li> Allocate the send buffer
+     <li> Add myself to the Grid Hierarchy
+   </ul>
+   But this is pretty straightforward code nonetheless.
+   <p>
+   Note that the input variable "storage" determines whether to
+   allocate the 3-D data or not. You can toggle this later
+   with @seeroutine DisableGFDataStorage and @seeroutine
+   EnableGFDataStorage
+   @enddesc 
+   @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) {
+  pGF *res;			/* The result */
+  pGF **rlist;			/* A temporary to add to the GH */
+  int i, dir, dirp1, dirp2, sz;	/* Counter thingies */
+
+  /* Space for the struct */
+  res = (pGF *)malloc(sizeof(pGF));
+
+  /* Set my parent GH */
+  res->parentGH = GH;
+
+  /* Set up the name */
+  res->name = (char *)malloc((1+strlen(name))*sizeof(char));
+  strcpy(res->name,name);
+
+  res->stagger = stagger;
+  if( (GH->stagger == PUGH_NO_STAGGER) &&
+      (res->stagger != PUGH_VERTEXCTR) )
+  {  printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n");
+     printf ("             designation PUGH_NO_STAGGER. \n");
+     STOP;
+  }
+
+
+  /* Setup IO stuff to default no IO with space for xgraph files.*/
+  res->do_3dio = 0;
+  res->do_2dio = 0;
+  res->do_1dio = 0;
+  res->do_0dio = 0;
+  res->do_conv = 0;
+  
+  res->convfile = NULL;
+  
+  res->xgfile = (FILE **)malloc(5*sizeof(FILE *));
+  for (i=0;i<5;i++)
+    res->xgfile[i] = NULL;
+
+  res->zerodfile = (FILE **)malloc(4*sizeof(FILE *));
+  for (i=0;i<4;i++)
+    res->zerodfile[i] = NULL;
+
+  res->IEEEfile = NULL;
+
+  for (i=0;i<3;i++) 
+    res->IEEEfile_2d[i] = NULL;
+  
+  res->show_maxval = 0;
+
+  res->lastio_it[0] = -1; res->lastio_it[1] = -1; 
+  res->lastio_it[2] = -1; res->lastio_it[3] = -1;
+  
+  SetGFComm(res, docomm);
+
+
+  /* Finally add this to the list of grid funcs in the gh */
+  GH->ngridFuncs ++;
+  rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *));
+  if (GH->ngridFuncs > 1) {
+    for (i=0;i<GH->ngridFuncs-1;i++)
+      rlist[i] = GH->gridFuncs[i];
+    free(GH->gridFuncs);
+  }
+  res->gfno = GH->ngridFuncs - 1;
+  rlist[GH->ngridFuncs-1] = res;
+  GH->gridFuncs = rlist;
+  /* Set up the comm layer */
+  res->padddata = NULL;
+  res->data = NULL;
+  res->send_buffer = NULL;
+  res->recv_buffer = NULL;
+
+  if (storage == PUGH_NOSTORAGE) {
+    res->storage = PUGH_STORAGE; /* Fake it out... */
+    DisableGFDataStorage(GH, res);
+  } else if (storage == PUGH_STORAGE) {
+    res->storage = PUGH_NOSTORAGE; /* Fake it out... */
+    EnableGFDataStorage(GH, res);
+  } else {
+    printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage);
+    printf ("         Perhaps misspelled in GridFuncList? Defaulting to storage active.\n");
+    EnableGFDataStorage(GH, res);
+  }
+
+#ifdef MPI
+  /* Null my send and recieve requests */
+  for (i=0;i<6;i++) {
+    res->sreq[i] = MPI_REQUEST_NULL;
+    res->rreq[i] = MPI_REQUEST_NULL;
+  }
+#endif
+
+  /* And return myself... */
+  return res;
+}
+
+ /*@@
+   @routine    DestroyPGF
+   @date       Thu Aug 21 11:44:22 1997
+   @author     Paul Walker
+   @desc 
+   Destroys a GF object.
+   @enddesc 
+@@*/
+
+void DestroyPGF(pGH *GH, pGF **GFin) {
+  pGF *GF;
+  GF = *GFin;
+
+  if (GF->storage)
+    DisableGFDataStorage(GH, GF);
+  
+  free(GF->name);
+  free(GF->padddata);
+
+  free(GF->xgfile);
+  free(GF->zerodfile);
+
+  GF->data = NULL;
+
+  free(GF);
+  *GFin = NULL;
+
+}
+
+
+ /*@@
+   @routine    EnableGFDataStorage
+   @date       Thu Apr  3 12:44:38 1997
+   @author     Paul Walker
+   @desc 
+   This routine toggles the data storage to the "on"
+   position. That means that data points to a 3D
+   array, rather than a single Double, and the
+   storage flag is set to one. This is used quite
+   a lot by thorns which need to toggle memory. for
+   instance, see the trK Driver in the util thron.
+   @enddesc 
+@@*/
+
+/* Statics for padding */
+static int padding_active = -1;
+static int padding_cacheline_bits, padding_size, padding_address_spacing;
+
+int zero_memory = -1;
+
+void EnableGFDataStorage(pGH *GH, pGF *GF) {
+  int i, dir, dirp1, dirp2, sz;
+  int special_pad, cache_size, start;
+
+  if (zero_memory < 0) {
+    zero_memory = Contains("zero_memory","yes");
+    if (zero_memory) {
+      printf ("Zeroing memory for allocated GFs at alloc time\n");
+    }
+  }
+
+  
+  if (padding_active == -1) {
+    padding_active = Contains("padding_active","yes");
+    padding_cacheline_bits = Geti("padding_cacheline_bits");
+    padding_size = Geti("padding_size");
+    padding_address_spacing = Geti("padding_address_spacing");
+    if (padding_active) {
+      printf ("PUGH Memory padding active. Stats:\n");
+      printf ("  cacheline_bits   : %d\n",
+	      padding_cacheline_bits);
+      printf ("  size             : %d\n",
+	      padding_size);
+      printf ("  spacing          : %d\n",
+	      padding_address_spacing);
+    }
+  }
+
+  if (GF->storage) {
+    printf ("WARNING: Tried to enable %s when already enabled\n",
+	    GF->name);
+    return;
+  }
+
+  /* Set up the send buffers. Note we only do this if
+     we are not using the derived types comm style.
+     */
+  assert(!GF->send_buffer);
+  GF->send_buffer = (Double **)malloc(6*sizeof(Double *));
+
+  assert(!GF->recv_buffer);
+  GF->recv_buffer = (Double **)malloc(6*sizeof(Double *));
+  
+  for (i=0;i<6;i++) {
+    dir = i/2;
+    dirp1 = (i/2+1)%3;
+    dirp2 = (i/2+2)%3;
+    
+    if (dir == 0) {
+      sz = GH->stencil_width * GH->lny * GH->lnz;
+    } else if (dir == 1) {
+      sz = GH->stencil_width * GH->lnx * GH->lnz;
+    } else {
+      sz = GH->stencil_width * GH->lnx * GH->lny;
+    }
+    
+    if (GH->neighbors[GH->myproc][i] >= 0) {
+      GF->buffer_sz[i] = sz;
+      GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+      GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+      assert(GF->recv_buffer[i]);
+      assert(GF->send_buffer[i]);
+    } else {
+      GF->buffer_sz[i] = 0;
+      GF->send_buffer[i] = NULL;
+      GF->recv_buffer[i] = NULL;
+    }
+  }
+
+  /* Set up the storage */
+  if (GF->padddata) free (GF->padddata);
+  if (!padding_active) {
+    GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double));
+    GF->data     = GF->padddata;	/* No padding case */
+    if (zero_memory) {
+      for (i=0;i<GH->npoints;i++)
+        GF->padddata[i] = 0.0;
+    }
+  } else {
+    GF->padddata  = (Double *)malloc((GH->npoints + padding_size)
+				    *sizeof(Double));
+   
+    /* Align the actual starting address on a cache line.
+       More specifically align on a unique cache line for
+       each field. 
+       */
+    cache_size = 2 << (padding_cacheline_bits - 1);
+    start = ((long) (GF->padddata) / sizeof(Double))%cache_size;
+
+    special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size;
+    GF->data = (GF->padddata) + special_pad;
+
+    if (special_pad > padding_size){
+      printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size);
+      STOP;
+    }
+    if (zero_memory) {
+      for (i=0;i<GH->npoints+padding_size;i++)
+        GF->padddata[i] = 0.0;
+    }
+  }
+
+  if (!GF->padddata) {
+    printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n",
+	    GF->name, GF->gfno);
+    STOP;
+  }
+  GF->storage = 1;
+
+
+}
+
+ /*@@
+   @routine    DisableGFDataStorage
+   @date       Thu Apr  3 12:45:34 1997
+   @author     Paul Walker
+   @desc 
+   This routine disables the grid function storage.
+   That is, it un-allocates the 3D array and in its
+   place allocates a single Double with the value
+   0.0. This allows us to still ahve something to
+   pass around (an array of size (1,1,1) in fortran
+   speak) but also to not need all our 3D arrays 
+   "on" all the time.
+   @enddesc 
+@@*/
+
+
+void DisableGFDataStorage(pGH *GH, pGF *GF) {
+  int i;
+
+  if (!GF->storage) {
+    printf ("Warning: Tried to disable %s when already disabled\n",
+	    GF->name);
+    return;
+  }
+
+  if (GF->padddata) {
+    free(GF->padddata);
+    GF->padddata = NULL;
+    GF->data = NULL;
+  }
+
+  if (GF->send_buffer) {
+    for (i=0;i<6;i++)
+      if (GF->send_buffer[i]) {
+#ifdef MPI
+	if(GF->sreq[i] != MPI_REQUEST_NULL){
+	  CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i])));
+	}
+#endif
+    	free(GF->send_buffer[i]);
+	GF->send_buffer[i] = NULL;
+      }
+    free(GF->send_buffer);
+    GF->send_buffer = NULL;
+  }
+
+
+  if (GF->recv_buffer) {
+    for (i=0;i<6;i++)
+      if (GF->recv_buffer[i]) {
+	free(GF->recv_buffer[i]);
+	GF->recv_buffer[i] = NULL;
+      }
+    free(GF->recv_buffer);
+    GF->recv_buffer = NULL;
+  }
+
+  GF->padddata = (Double *)malloc(sizeof(Double));
+  GF->data = GF->padddata;
+  GF->data[0] = 0.0;		/* Very important! */
+  GF->storage = 0;
+}
+
+ /*@@
+   @routine    SetGFComm
+   @date       Thu Apr  3 12:46:23 1997
+   @author     Paul Walker
+   @desc 
+   This sets the docomm[3] array of the GF based
+   on the setting of the comm flag. Note the
+   comm flag constants are defined in
+   @seeheader pughDriver.h. As well as SetupPGF
+   this is called by thorns (eg, elliptic) which
+   need to toggle this state.
+   @enddesc 
+   @calledby   SetupPGF
+@@*/
+
+
+void SetGFComm(pGF *res, int docomm) {
+  /* Copy docomm */
+  int i;
+  res->commflag = docomm;
+
+  for (i=0;i<6;i++)
+    res->docomm[i] = 0;
+
+  if (docomm == PUGH_NOCOMM) {
+    for (i=0;i<6;i++)
+      res->docomm[i] = 0;
+  } else if (docomm == PUGH_ALLCOMM) {
+    for (i=0;i<6;i++)
+      res->docomm[i] = 1;
+  } else if (docomm == PUGH_XCOMM) {
+    res->docomm[0] = 1;
+    res->docomm[1] = 1;
+  } else if (docomm == PUGH_YCOMM) {
+    res->docomm[2] = 1;
+    res->docomm[3] = 1;
+  } else if (docomm == PUGH_ZCOMM) {
+    res->docomm[4] = 1;
+    res->docomm[5] = 1;
+  } else if (docomm == PUGH_PLUSFACESCOMM) {
+    res->docomm[1] = 1;
+    res->docomm[3] = 1;
+    res->docomm[5] = 1;
+  } else if (docomm == PUGH_MINUSFACESCOMM) {
+    res->docomm[0] = 1;
+    res->docomm[2] = 1;
+    res->docomm[4] = 1;
+  } else {
+    printf("WARNING: Unsupported comm. in grid function %s .\n",res->name);
+    printf("         Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n");
+    for (i=0;i<6;i++)
+      res->docomm[i] = 1;
+  }
+
+  /* Handle nx = 1 type cases. This is only important for one
+     processor MPI periodic boundaries.
+   */
+  /* This is a hack to get a GH. Unfortunately I decided not
+     to pass a GH to this routine, and I should go back and
+     change it, but that is a lot of changes so for now I'll
+     do the following.
+    */
+  if (GHList->GH->nx == 1) {
+    res->docomm[0] = 0;
+    res->docomm[1] = 0;
+  }
+
+  if (GHList->GH->ny == 1) {
+    res->docomm[2] = 0;
+    res->docomm[3] = 0;
+  }
+
+  if (GHList->GH->nz == 1) {
+    res->docomm[4] = 0;
+    res->docomm[5] = 0;
+  }
+}
diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c
new file mode 100644
index 0000000..58e6a07
--- /dev/null
+++ b/src/pugh/SetupSliceCenter.c
@@ -0,0 +1,137 @@
+ /*@@
+   @file      SetupSliceCenter.c
+   @date      
+   @author    
+   @desc 
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+void SetupSliceCenter(pGH *GH) {
+  int slice_center;
+ 
+
+  /* slice_center used to be part of the GF, */ 
+  /* either 1 (default) or 0 (octant,edge)*/
+  /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */
+ 
+  if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) {
+    printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)");
+    printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n",
+	   GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]);
+    return;
+  }
+
+  slice_center=1;
+  if (Contains("grid","octant") || Contains("grid","quadrant") ||
+      Contains("grid","bitant"))
+    slice_center=0;
+  if (Contains("slice_loc","edge")) slice_center=0;
+
+  if (slice_center) {
+    /* For NONE octant mode: the center for slicings */
+    /* is moved to the center index */
+    /* (unless nx,ny,nz=1) */  
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+    }
+
+    if (GH->ny == 1) {
+      GH->spxyz[1] = 0;
+      GH->sp2dxyz[1] = 0;
+    } else {
+      GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+      GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+    }
+
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+    }
+  } 
+  /* for octant mode, the slicing center is the index 1 or zero*/
+  else {
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] =  1 - GH->lb[GH->myproc][0];
+    }
+
+    if (GH->ny == 1) {
+      GH->spxyz[1] = 0;
+      GH->sp2dxyz[1] = 0;
+    } else {
+      GH->spxyz[1] = 1 - GH->lb[GH->myproc][1];      
+      GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1];      
+    }
+
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = 1 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2];
+    }
+  }
+
+  /* Special case: in quadrant mode x and y are like full, but
+     z is like octant. Thinking about this gives you a headache,
+     but works.
+  */
+  if (Contains("grid","quadrant")) {
+    if (GH->nz == 1) {
+      GH->spxyz[2] = 0;
+      GH->sp2dxyz[2] = 0;
+    } else {
+      GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+      GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+    }
+  }
+
+  /* Another special case: bitant!  Hey, with full and octant
+     already implemented, bitant is a breeze on the neurons */
+  if (Contains("grid","bitant"))
+  {  if (GH->nx == 1)
+     {  GH->spxyz[0] = 0;
+        GH->sp2dxyz[0] = 0;
+     }
+     else
+     {  GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+        GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+     }
+     if (GH->ny == 1)
+     {  GH->spxyz[1] = 0;
+        GH->sp2dxyz[1] = 0;
+     }
+     else
+     {  GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+        GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+     }
+  }
+
+#ifdef THORN_CARTOON_2D
+  /* for cartoon_2d, treat x as in octant, y and z as in full, BB */
+  if (Contains("cartoon_active","yes")) {
+    if (GH->nx == 1) {
+      GH->spxyz[0] = 0;
+      GH->sp2dxyz[0] = 0;
+    } else {
+      GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+      GH->sp2dxyz[0] =  1 - GH->lb[GH->myproc][0];
+    }
+  }
+#endif
+}
diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c
new file mode 100644
index 0000000..2d21ae2
--- /dev/null
+++ b/src/pugh/SyncGroupGF.c
@@ -0,0 +1,271 @@
+/*@@
+  @file SyncGroupGH.c
+  @author Paul Walker
+  @date March 1997
+  @desc
+  Synchronizes all the Grid Functions (which have their communications
+  on) in a given group which has previously been registered to 
+  the GF data base with StoreGFGroupData. The MPI section was
+  formally called SyncGH.c
+  @enddesc
+  @history
+  @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+  @hdesc Added the functionality to synchronise indices from a group 
+         rather than the whole GH. Changed the routine/file names.
+  @endhistory
+  @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+/*#define VERBOSE*/
+#undef COMM_TIMING
+
+int GetGFGroupData(const char *name, int *nGFs, int **GFIndices);
+
+/*@@
+  @routine SyncGroupGF
+  @author Paul Walker
+  @date March 1997
+  @desc
+  Synchronizes all the Grid Functions (which have their communications
+  on) in a given group which has previously been registered to 
+  the GF data base with StoreGFGroupData. The MPI section was
+  formally called SyncGH.c
+  @enddesc
+  @history
+  @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+  @hdesc Added the functionality to synchronise indices from a group 
+         rather than the whole GH. Changed the routine/file names.
+  @endhistory
+  @@*/
+
+
+/*  This routine works in one of two ways:
+ *
+ *  1. If using derived data types, it posts receives, posts sends
+ *     waits for all sends and receives, and finishes the receives.
+ *
+ *  2. If using buffered communication, it posts recieves, posts
+ *     sends, waits for receives only, and finishes them. We don't
+ *     need to wait for sends until the next time through. This
+ *     is why the wait for the send buffer is immediately before
+ *     the post send. (Since the send buffers are initially set
+ *     to MPI_REQUEST_NULL the first Wait and any waits in
+ *     unused directions are a wash).
+ *
+ *  This routine is intertwined with the contents of routines
+ *  pGF_PostRecv, pGF_PostSend, and pGF_FinishSend. 
+ *
+ *  Without MPI, this does nothing, of course.
+ *
+ *  Note this routine is also responsible for updating the
+ *  commt global so we can get a measure of how much time we
+ *  spend in communications overhead.
+ *
+ *  Also note that it is more efficient to sync a number of 
+ *  GF together (as a group or a hierachy) rather than 
+ *  singly. This is because syncing single functions waits for the
+ *  receives on a single function before posting the sends on 
+ *  all the other functions.
+ */
+
+void SyncGroupGF(pGH *GH,const char *groupname) {
+#ifdef MPI
+  int j, Dir, index;
+  double t1,t2,t3;
+  MPI_Request *sr = NULL;
+  MPI_Status ms, mss;
+#endif
+  int i;
+  double st[PUGH_NTIMERS],et[PUGH_NTIMERS];
+  int l;
+  int *GFindices; 
+  int nGFs;
+
+  pughTimers(st);
+
+  /* Find which grid functions make up the group */
+  GetGFGroupData(groupname,&nGFs,&GFindices);
+
+  /* Say which grid functions will actually be synchronised */
+  if (Contains("cactus_sync_verbose","yes")) { 
+    printf("Syncing %d grid functions in group %s\n",nGFs,groupname);
+    printf("  Syncing GFs ...");
+    for (i=0;i<nGFs;i++) {
+      if (GH->gridFuncs[GFindices[i]]->storage 
+          && (GH->gridFuncs[GFindices[i]]->docomm[1] ||
+              GH->gridFuncs[GFindices[i]]->docomm[2] ||
+              GH->gridFuncs[GFindices[i]]->docomm[3] ||
+              GH->forceSync))
+	printf(" %s",GH->gridFuncs[GFindices[i]]->name);
+    }
+    printf("\n");
+  }
+
+#ifdef MPI
+  if (GH->commmodel == PUGH_DERIVEDTYPES) {
+    /* 2 faces, send and receive is the 2 * 2 */
+    sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request));
+    for (i=0;i<nGFs * 2 * 2; i++)
+      sr[i] = MPI_REQUEST_NULL;
+  }
+
+  for (Dir = 0; Dir < 3; Dir ++) {
+#ifdef COMM_TIMING
+    t1 = MPI_Wtime();
+#endif
+    for(i=0;i<nGFs;i++) {
+      pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+      pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+    }
+    
+#ifdef COMM_TIMING
+    t2 = MPI_Wtime();
+    printf ("PR : %lf\n",t2-t1);
+#endif
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      for(i=0;i<nGFs;i++) {
+	/* Wait for the last send. Since these will be null if the
+	   are not used, this is always safe.
+	*/
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    } else {
+      for(i=0;i<nGFs;i++) {
+	/* Wait for the last send. Since these will be null if the
+	   are not used, this is always safe.
+	*/
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    }
+
+#ifdef COMM_TIMING
+    t1 = MPI_Wtime();
+    printf ("PS : %lf\n",t1-t2);
+#endif
+
+    /* Now comes the big difference between derived types and
+       allocated buffers. With derived types, we now have to
+       wait on all our recieve AND SEND buffers so we can
+       keep on using the send buffers ( as communications are
+       in-place). With the allocated we have to wait on each
+       recieve, but not on the send, since we don't need the
+       send buffer until we pack a send again (above)
+    */
+    
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      /* Do a wait any on the recieves */
+      for(i=0;i<nGFs;i++) {
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss);
+	pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+	MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss);
+	pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+      }
+    } 
+
+    if (GH->commmodel == PUGH_DERIVEDTYPES) {
+      /* Load up the thing for the waitall */
+      for (i=0;i<nGFs;i++) {
+	int id = i*4;
+	if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] &&
+	    GH->gridFuncs[GFindices[i]]->storage) {
+	  sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir];
+	  sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir];
+	} else {
+	  sr[id] = MPI_REQUEST_NULL;
+	  sr[id+1] = MPI_REQUEST_NULL;
+	}
+	  
+	if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] &&
+	    GH->gridFuncs[GFindices[i]]->storage) {
+	  sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1];
+	  sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1];
+	} else {
+	  sr[id+2] = MPI_REQUEST_NULL;
+	  sr[id+3] = MPI_REQUEST_NULL;
+	}
+      }
+      /* Now do a waitall */
+      MPI_Waitall(4*nGFs,sr,&ms);
+	  
+    }
+    if (sr) free(sr);
+#ifdef COMM_TIMING
+    t2 = MPI_Wtime();
+    printf ("FR : %lf\n",t2-t1);
+#endif
+  }
+#endif
+  pughTimers(et);
+  for (l=0;l<PUGH_NTIMERS;l++)
+    commt[l] += et[l]-st[l];
+}
+
+
+/*@@
+  @routine SyncGH
+  @author Gabrielle Allen
+  @date Sep 27 1998
+  @desc
+  Wrapper to support SyncGH command, which is now implemented through 
+  a group.
+  @enddesc
+  @@*/
+
+void SyncGH(pGH *GH) {
+
+  SyncGroupGF(GH,"$GH");
+
+}
+
+
+/*@@
+  @routine SyncGF
+  @author Gabrielle Allen
+  @date Sep 27 1998
+  @desc
+  Wrapper to support SyncGF command, which is now implemented through 
+  a group. Note that it is more efficient to sync all required GFs at 
+  the same time, using the @seeroutine SyncGroupGF routine with a given group.
+  @enddesc
+  @@*/
+
+void SyncGF(pGH *GH,pGF *GF) {
+
+  SyncGroupGF(GH,GF->name);
+
+}
+
+
+/* The old code for SyncGF is listed here, since it illustrates simply
+ * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv
+ * */
+
+/*  void SyncGF(pGH *GH, pGF *GF) { */
+
+/* #ifdef MPI */
+/*    int Dir; */
+/*    MPI_Status mss; */
+/*    for (Dir=0;Dir<3;Dir++) { */
+/*       pGF_PostRecv(GH, GF, 2*Dir); */
+/*       pGF_PostRecv(GH, GF, 2*Dir+1); */
+
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */
+/*      pGF_PostSend(GH, GF, 2*Dir); */
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */
+/*       pGF_PostSend(GH, GF, 2*Dir+1); */
+
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */
+/*       pGF_FinishRecv(GH, GF, 2*Dir); */
+/*       CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */
+/*       pGF_FinishRecv(GH, GF, 2*Dir+1); */
+/*    } */
+/* #endif */
+/* } */
+ 
diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c
new file mode 100644
index 0000000..316dfc1
--- /dev/null
+++ b/src/pugh/pGF_FinishRecv.c
@@ -0,0 +1,81 @@
+ /*@@
+   @file      pGF_FinishRecv.c
+   @date      Thu Apr  3 11:37:28 1997
+   @author    Paul Walker
+   @desc 
+   The routine which finalize the MPI recieves for a grid
+   function. Critically linked with @seefile pGF_PostRecv.c
+   and @seefile pGF_PostSend.c
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_FinishRecv
+   @date       Thu Apr  3 11:38:07 1997
+   @author     Paul Walker
+   @desc 
+   This routine finalizes the MPI communication through a face.
+   It is crucially linked with @seeroutine pGF_PostRecv and
+   @seeroutine pGF_PostSend.
+   <p>
+   <b>Important!</b>
+   This routine does <b>not</b> wait on the recieves.
+   Before it is called, you must do the MPI_Wait or
+   else you will not get the right answer. for an
+   example of this, see @seeroutine SyncGroupGF or
+   @seeroutine SyncGroupGF
+   @enddesc 
+   @calledby   SyncGroupGF
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+@@*/
+
+void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+  int R;
+  MPI_Status ms;
+  int istart, iend, jstart, jend, kstart, kend;
+  int ii,jj,kk,xx;
+  int tmp;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    /* Here wait one at a time, so the others can arrive while
+       we copy the data back in... */
+    
+#ifdef DEBUG_PRINT
+    printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n",
+	    GF->name,dir,GH->myproc,GF->rreq[dir]);
+#endif
+
+    /* Copy buffer onto GF Storage */
+    istart = GH->ghosts[GF->stagger][dir][0][0];
+    iend   = GH->ghosts[GF->stagger][dir][0][1];
+    jstart = GH->ghosts[GF->stagger][dir][1][0];
+    jend   = GH->ghosts[GF->stagger][dir][1][1];
+    kstart = GH->ghosts[GF->stagger][dir][2][0];
+    kend   = GH->ghosts[GF->stagger][dir][2][1];
+    
+    /* Great now copy. Note ordering is just link in PostSend */
+    xx=0;
+    for (kk=kstart; kk<kend; kk++) 
+      for (jj=jstart; jj<jend; jj++)
+	for (ii=istart; ii<iend; ii++)
+	  GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++];
+    
+  }
+
+#endif
+}
diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c
new file mode 100644
index 0000000..81c4a30
--- /dev/null
+++ b/src/pugh/pGF_PostRecv.c
@@ -0,0 +1,69 @@
+ /*@@
+   @file      pGF_PostRecv.c
+   @date      Fri Feb 21 11:28:06 1997
+   @author    
+   @desc 
+   Routines which post all the IRecv commands for a GF. These
+   allow the asyncronous model proposed in, for example,
+   @seeroutine SyncGF to go!
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine   pGF_PostRecv
+   @date      Thu Apr  3 12:10:46 1997
+   @author    Paul Walker
+   @desc 
+   This routine posts a recieve (MPI_Irecv) of the buffer
+   we want to get the send from another processor, which
+   will be sent by @seeroutine pGF_PostSend and then
+   finalized by @seeroutoine pGF_FinishRecv. 
+   <p>
+   Aside from a silly calculation to get a unique tag
+   based on neigbors and processors, this is a very
+   straightforward routine.
+   @enddesc 
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+ @@*/
+
+void pGF_PostRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+  int R;
+  int rtag;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno);
+    /* mod the tag to force MPI compliance */
+    rtag = rtag % 32768;
+#ifdef DEBUG_PRINT
+    printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n",
+	    GF->name,dir,GH->myproc,rtag,
+	    GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS)
+      CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]),
+		    GF->buffer_sz[dir],PUGH_MPI_TYPE,
+		    GH->neighbors[GH->myproc][dir],rtag,
+		    GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+    if (GH->commmodel == PUGH_DERIVEDTYPES)
+      CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data),
+		    1,GH->recv_dt[GF->stagger][dir],
+		    GH->neighbors[GH->myproc][dir],rtag,
+		    GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+  }
+#endif
+}
diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c
new file mode 100644
index 0000000..d8ec630
--- /dev/null
+++ b/src/pugh/pGF_PostSend.c
@@ -0,0 +1,116 @@
+ /*@@
+   @file      pGF_PostSend.c
+   @date      Thu Apr  3 11:58:14 1997
+   @author    Paul Walker
+   @desc 
+   The send-arm of the three-way pugh communcations branch
+   which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c
+   and @seefile pGF_FinishRecv.c. See the documentation for
+   @seeroutine pGF_PostSend for details on this routine.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_PostSend
+   @date       Thu Apr  3 11:58:59 1997
+   @author     Paul Walker
+   @desc 
+   This routine posts an asyncronous MPI send of a buffer
+   for a face (MPI_Isend). It does this by first packing up
+   a send buffer (allocated in @seeroutine SetupPGF) then
+   sending it out on the pipe.
+   <p>
+   Since this is an asynchronous communications model we
+   assume that a recieve has been posted for the send.
+   thus the correct calling order for this is as in
+   @seeroutine SyncGF, eg, after a Recv.
+   <p>
+   Note this does <b>not</b> wait on the send buffer from previous
+   communications. It is the users responsibility to wait on
+   that buffer.
+   @enddesc 
+   @calledby   SyncGroupGF
+   @history
+   @hdate Nov 4 1998 @hauthor Gabrielle Allen
+   @hdesc Allow for forced synchronization of all GFs with storage
+   @endhistory
+@@*/
+
+void pGF_PostSend(pGH *GH, pGF *GF, int dir) {
+  int R, stag, dircomp;
+  int tmp;
+  int ii,jj,kk,xx;
+  int istart, iend;
+  int jstart, jend;
+  int kstart, kend;
+
+#ifdef MPI
+  Double ts, tw, tp;
+
+  MPI_Status ms;
+
+  /* Return if GF has no storage */
+  if (!(GF->storage)) return;
+
+  /* Return if communication not required and no forced synchronisation */
+  if (!(GH->forceSync || GF->docomm[dir])) return;
+
+  if (GH->neighbors[GH->myproc][dir] >= 0) {
+    ts = MPI_Wtime();
+    dircomp = dir+1;		/* Complementary direction */
+    if (dircomp %2 == 0) dircomp = dir-1;
+    /* Note this is the complement of the rtag set in PostRecv */
+    stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] + 
+			       GH->nprocs * GF->gfno);
+    /* mod the tag to force MPI compliance */
+    stag = stag % 32768;
+
+#ifdef DEBUG_PRINT
+    printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n",
+	    GF->name,dir,GH->myproc,stag,
+	    GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+
+    if (GH->commmodel == PUGH_DERIVEDTYPES)
+      CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data),
+		    1,GH->send_dt[GF->stagger][dir],
+		    GH->neighbors[GH->myproc][dir],stag,
+		    GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+
+    if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+      /* Copy my information into the send buffer */
+      istart = GH->overlap[GF->stagger][dir][0][0];
+      iend   = GH->overlap[GF->stagger][dir][0][1];
+      jstart = GH->overlap[GF->stagger][dir][1][0];
+      jend   = GH->overlap[GF->stagger][dir][1][1];
+      kstart = GH->overlap[GF->stagger][dir][2][0];
+      kend   = GH->overlap[GF->stagger][dir][2][1];
+      
+      /* Great now copy */
+      xx = 0;
+      for (kk=kstart; kk<kend; kk++) 
+	for (jj=jstart; jj<jend; jj++)
+	  for (ii=istart; ii<iend; ii++)
+	    GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)];
+      
+      tp = MPI_Wtime();
+      
+      /* post send */
+      CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]),
+		    GF->buffer_sz[dir],PUGH_MPI_TYPE,
+		    GH->neighbors[GH->myproc][dir],stag,
+		    GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+      tw = MPI_Wtime();
+#ifdef COMM_TIMING
+      printf ("GF %s Dir %d Time %lf\n",
+	      GF->name, dir, tw-ts);
+#endif
+    }
+  }
+#endif
+}
diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c
new file mode 100644
index 0000000..bc8e211
--- /dev/null
+++ b/src/pugh/pGF_Reduction.c
@@ -0,0 +1,198 @@
+ /*@@
+   @file      pGF_Reduction.c
+   @date      Thu Apr  3 11:54:53 1997
+   @author    Paul Walker
+   @desc 
+   Various MPI reduction operators for maxima, minima, norms
+   and the like on grid functions. These are all pretty
+   straighforward thanks to the miracle of MPI_Allreduce.
+   @enddesc 
+   @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+   @routine    pGF_MaxVal
+   @date       Thu Apr  3 11:55:25 1997
+   @author     Paul Walker
+   @desc 
+   Returns the maximum value of a distributed grid function.
+   @enddesc 
+@@*/
+
+
+Double pGF_MaxVal(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i;
+  
+  if (!GF->storage) return GF->maxval;
+
+  res = GF->data[0];
+  for (i=1;i<GH->npoints;i++)
+    res = (GF->data[i] > res? GF->data[i] : res);
+
+
+  /* So now res is the local max */
+#ifdef MPI
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD));
+#endif
+
+  return res;
+}
+
+
+ /*@@
+   @routine    pGF_MinVal
+   @date       Thu Apr  3 11:55:48 1997
+   @author     Paul Walker
+   @desc 
+   Returns the minimum value of a distributed grid function.
+   @enddesc 
+@@*/
+
+Double pGF_MinVal(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i;
+
+  if (!GF->storage) return GF->minval;
+
+  res = GF->data[0];
+  for (i=1;i<GH->npoints;i++)
+    res = (GF->data[i] < res ? GF->data[i] : res);
+
+  /* So now res is the local max */
+#ifdef MPI
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD));
+#endif
+
+  return res;
+}
+
+
+
+ /*@@
+   @routine    pGF_Norm1
+   @date       Thu Apr  3 11:56:05 1997
+   @author     Paul Walker
+   @desc 
+   Returns the average of a distributed grid function.
+   @enddesc 
+@@*/
+
+Double pGF_Norm1(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i, ii, jj, kk;
+  int is,ie,js,je,ks,ke;
+  int np,tnp;
+
+  if (!GF->storage) return GF->norm1;
+
+
+#define ABS(x) ((x)<0?-(x):(x))
+
+#ifndef MPI
+  res = 0.0;
+  for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]);
+  tnp = GH->npoints;
+
+#else
+  res = 0.0;
+  np  = 0;
+  for (kk=GH->ownership[GF->stagger][2][0]; 
+       kk < GH->ownership[GF->stagger][2][1];
+       kk ++)
+    for (jj = GH->ownership[GF->stagger][1][0]; 
+	 jj < GH->ownership[GF->stagger][1][1]; 
+	 jj ++)
+      for (ii = GH->ownership[GF->stagger][0][0];  
+	   ii < GH->ownership[GF->stagger][0][1]; 
+	   ii ++) {
+	res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]);
+	np++;
+      }
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+  CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+  
+  res = res/tnp;
+  return res;
+}
+
+
+ /*@@
+   @routine    pGF_Norm2
+   @date       Thu Apr  3 11:56:36 1997
+   @author     Paul Walker
+   @desc 
+   Returns the "norm2" of a distributed grid function. That
+   is, returns $\sqrt{\Sigma (a_i * a_i) / np}$.
+   @enddesc 
+@@*/
+
+
+Double pGF_Norm2(pGH *GH, pGF *GF) {
+  Double res, lres;
+  int i, ii, jj, kk;
+  int is,ie,js,je,ks,ke;
+  int np, tnp;
+
+  if (!GF->storage) return GF->norm2;
+
+#ifndef MPI
+  res = 0.0;
+  for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i];
+  tnp = GH->npoints;
+  
+#else
+  /* MPI version depends heavily on the ownership array */
+  res = 0.0;
+  np  = 0;
+
+  for (kk=GH->ownership[GF->stagger][2][0]; 
+       kk < GH->ownership[GF->stagger][2][1]; 
+       kk ++)
+    for (jj = GH->ownership[GF->stagger][1][0]; 
+	 jj < GH->ownership[GF->stagger][1][1]; 
+	 jj ++)
+      for (ii = GH->ownership[GF->stagger][0][0]; 
+	   ii < GH->ownership[GF->stagger][0][1]; 
+	   ii ++) {
+	res += GF->data[DATINDEX(GH,ii,jj,kk)] *
+	  GF->data[DATINDEX(GH,ii,jj,kk)];
+	np ++;
+      }
+  lres = res;
+  CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+  CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+  res = sqrt(res/tnp);
+  return res;
+}
+
+ /*@@
+   @routine    pGF_AllNorms
+   @date       Mon Jun 30 10:44:57 1997
+   @author     Paul Walker
+   @desc 
+   Returns all 4 norms into a Double pointer in the order
+   max min norm1 norm2
+   <p>
+   This is purely a convenience function.
+   @enddesc 
+   @calls      pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2
+@@*/
+
+void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) {
+  res[0] = pGF_MaxVal(GH,GF);
+  res[1] = pGF_MinVal(GH,GF);
+  res[2] = pGF_Norm1(GH,GF);
+  res[3] = pGF_Norm2(GH,GF);
+}
+