aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README19
-rw-r--r--interface.ccl6
-rw-r--r--param.ccl27
-rw-r--r--schedule.ccl7
-rw-r--r--src/Comm.c88
-rw-r--r--src/GHExtension.c44
-rw-r--r--src/SetupPGH.c1072
-rw-r--r--src/Startup.c67
-rw-r--r--src/include/pGArray.h20
-rw-r--r--src/include/pGF.h100
-rw-r--r--src/include/pGH.h132
-rw-r--r--src/include/pugh.h150
-rw-r--r--src/include/pughDriver.h68
-rw-r--r--src/include/pughProblem.h53
-rw-r--r--src/include/pugh_constants.h64
-rw-r--r--src/make.code.defn8
-rw-r--r--src/pugh/SetupPGArray.c84
-rw-r--r--src/pugh/SetupPGF.c438
-rw-r--r--src/pugh/SetupSliceCenter.c137
-rw-r--r--src/pugh/SyncGroupGF.c271
-rw-r--r--src/pugh/pGF_FinishRecv.c81
-rw-r--r--src/pugh/pGF_PostRecv.c69
-rw-r--r--src/pugh/pGF_PostSend.c116
-rw-r--r--src/pugh/pGF_Reduction.c198
-rw-r--r--src/pugh_Comm.h52
-rw-r--r--src/pugh_extension.h30
26 files changed, 3401 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..b72ed2c
--- /dev/null
+++ b/README
@@ -0,0 +1,19 @@
+Cactus Code Thorn pugh
+Authors : ...
+Managed by : ... <...@...........>
+Version : ...
+CVS info : : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n--------------------------------------------------------------------------
+
+1. Purpose of the thorn
+
+This thorn does ...
+
+2. Dependencies of the thorn
+
+This thorn additionally requires thorns ...
+
+3. Thorn distribution
+
+This thorn is available to ...
+
+4. Additional information
diff --git a/interface.ccl b/interface.ccl
new file mode 100644
index 0000000..cd3d94f
--- /dev/null
+++ b/interface.ccl
@@ -0,0 +1,6 @@
+# Interface definition for thorn pugh
+# $Header$
+
+implements:driver
+inherits: Cactus
+
diff --git a/param.ccl b/param.ccl
new file mode 100644
index 0000000..6dcb5d7
--- /dev/null
+++ b/param.ccl
@@ -0,0 +1,27 @@
+# Parameter definitions for thorn pugh
+# $Header$
+
+public:
+
+INTEGER nx "The size of the grid in the x direction"
+{
+ *:-1 :: "Fixed size of minus this per processor"
+ 1:* :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER ny "The size of the grid in the y direction"
+{
+ *:-1 :: "Fixed size of minus this per processor"
+ 1:* :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER nz "The size of the grid in the z direction"
+{
+ *:-1 :: "Fixed size of minus this per processor"
+ 1:* :: "Grid of this size distributed across all processors"
+} 10
+
+INTEGER ghost_size "The width of the ghost zone"
+{
+ 1:* :: "Must be a positive integer"
+} 1
diff --git a/schedule.ccl b/schedule.ccl
new file mode 100644
index 0000000..7731f24
--- /dev/null
+++ b/schedule.ccl
@@ -0,0 +1,7 @@
+# Schedule definitions for thorn pugh
+# $Header$
+
+schedule pugh_Startup at STARTUP
+{
+ LANG:C
+} "Pugh startup routine."
diff --git a/src/Comm.c b/src/Comm.c
new file mode 100644
index 0000000..69f1e9f
--- /dev/null
+++ b/src/Comm.c
@@ -0,0 +1,88 @@
+ /*@@
+ @file Comm.c
+ @date Thu Feb 4 11:34:29 1999
+ @author Tom Goodale
+ @desc
+ Pugh communication functions
+ @enddesc
+ @@*/
+
+#include <stdarg.h>
+#include "flesh.h"
+
+int pugh_SyncGroup(cGH *GH, const char *group)
+{
+ return 0;
+}
+
+int pugh_EnableGroupStorage(cGH *GH, const char *group)
+{
+ return 0;
+}
+
+int pugh_DisableGroupStorage(cGH *GH, const char *group)
+{
+ return 0;
+}
+
+
+int pugh_EnableGroupComm(cGH *GH, const char *group)
+{
+ return 0;
+}
+
+int pugh_DisableGroupComm(cGH *GH, const char *group)
+{
+ return 0;
+}
+
+
+int pugh_Barrier(cGH *GH)
+{
+ return 0;
+}
+
+int pugh_Reduce(cGH *GH,
+ const char *operation,
+ int n_infields,
+ int n_outfields,
+ int out_type,
+ void **outarray,
+ ...)
+{
+ return 0;
+}
+
+
+
+int pugh_Interp(cGH *GH,
+ const char *operation,
+ int n_coords,
+ int n_infields,
+ int n_outfields,
+ int n_points,
+ int type,
+ ...)
+{
+ return 0;
+}
+
+
+int pugh_ParallelInit(cGH *GH)
+{
+ return 0;
+}
+
+int pugh_Exit(cGH *GH)
+{
+ return 0;
+}
+
+int pugh_Abort(cGH *GH)
+{
+ return 0;
+}
+
+
+
+
diff --git a/src/GHExtension.c b/src/GHExtension.c
new file mode 100644
index 0000000..f36f6ee
--- /dev/null
+++ b/src/GHExtension.c
@@ -0,0 +1,44 @@
+ /*@@
+ @file GHExtension.c
+ @date Thu Feb 4 10:47:14 1999
+ @author Tom Goodale
+ @desc
+ Pugh GH extension stuff.
+ @enddesc
+ @@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "flesh.h"
+#include "pugh.h"
+#include "declare_parameters.h"
+
+static char *rcsid = "$Header$";
+
+void *pugh_SetupGH(tFleshConfig *config,
+ int convergence_level,
+ cGH *GH)
+{
+ DECLARE_PARAMETERS
+ pGH *newGH;
+
+ newGH = SetupPGH(nx, ny, nz, ghost_size, PUGH_NO_STAGGER);
+
+ if(!newGH)
+ {
+ fprintf(stderr, "PUGH: Failed to allocate memory for a pGH !\n");
+ exit(2);
+ }
+
+ return newGH;
+}
+
+int pugh_InitGH(cGH *GH)
+{
+ return 0;
+}
+
+int pugh_rfrTraverseGH(cGH *GH, int rfrpoint)
+{
+ return 0;
+}
diff --git a/src/SetupPGH.c b/src/SetupPGH.c
new file mode 100644
index 0000000..76d1fd5
--- /dev/null
+++ b/src/SetupPGH.c
@@ -0,0 +1,1072 @@
+ /*@@
+ @file SetupPGH.c
+ @date Fri Feb 21 10:18:17 1997
+ @author Paul Walker
+ @desc
+ Initializes the pgh. You should really consult the
+ pGH description (@seeheader pGH.h) in order to clearly
+ understand what is going on.
+ @enddesc
+ @@*/
+
+#include "pugh.h"
+
+/* Old Id: SetupPGH.c,v 1.29 1999/01/12 08:23:35 bruegman Exp */
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine SetupPGH
+ @date Fri Feb 21 10:21:36 1997
+ @author Paul Walker
+ @desc
+ Sets up the PGH distribution based on processor number and
+ problem size. This amounts to:
+ <ul>
+ <li> Allocating and copying some stuff
+ <li> Figuring out an optimal domain decomposition
+ <li> Setting up ghosts and overlaps.
+ </ul>
+ Of course the latter two are hard, so some more comments
+ are in order.
+ <p>
+ The second is in @seeroutine ProcTop now.
+ <p>
+ The third should be obvious ;-)
+ @enddesc
+ @calls ProcTop
+ @history
+ @hauthor Gabrielle Allen @hdate 26 Sep 1998
+ @hdesc Changed IO parameter names
+ @hauthor Gabrielle Allen @hdate 17 Oct 1998
+ @hdesc Added initialisation of IO structure
+ @hauthor Gabrielle Allen @hdate 16 Nov 1998
+ @hdesc Added initialisation GH->forceSync
+ @hdate Thu Feb 4 11:13:37 1999
+ @hauthor Tom Goodale
+ @hdesc Modified for Cactus 4.0
+@@*/
+
+pGH *SetupPGH(int nx, int ny, int nz, int stencil_width,int staggertype) {
+ pGH *GH;
+
+
+ void pGH_SetupBasics(pGH *, int, int, int, int, int);
+ void pGH_SetupBounds(pGH *, int, int, int);
+ void pGH_SetupOwnership(pGH *);
+
+ /* Allocate for myself */
+ GH = (pGH*) malloc(sizeof(pGH));
+
+ pGH_SetupBasics(GH,nx,ny,nz,stencil_width,staggertype);
+ pGH_SetupBounds(GH,nx,ny,nz);
+ pGH_SetupOwnership(GH);
+
+ /* slice center not yet set: -1 */
+ GH->spxyz[0]=-1;
+ GH->spxyz[1]=-1;
+ GH->spxyz[2]=-1;
+
+
+ /* And we're done... */
+ return GH;
+}
+
+
+void pGH_SetupBasics(pGH *GH, int nx, int ny, int nz,
+ int stencil_width,int staggertype) {
+ int i;
+ int nprocx,nprocy,nprocz;
+
+ pGHList *ltemp;
+
+ void ProcTop(int, int *, int *, int *);
+
+
+ /* MPI Goobits */
+#ifdef MPI
+ /* Set up my communicator. This would allow us to run pugh on
+ a subset of processors at a later date if, for instance, we
+ were using panda or what not.
+ */
+ CACTUS_MPI_ERROR(MPI_Comm_dup(MPI_COMM_WORLD, &(GH->PUGH_COMM_WORLD)));
+
+ CACTUS_MPI_ERROR(MPI_Comm_size(GH->PUGH_COMM_WORLD, &(GH->nprocs)));
+ CACTUS_MPI_ERROR(MPI_Comm_rank(GH->PUGH_COMM_WORLD, &(GH->myproc)));
+#else
+ GH->nprocs = 1;
+ GH->myproc = 0;
+#endif
+
+ /* Activate GH (conv.grids can be inactivatet by nancheck so the evolution will
+ carry on) */
+ GH->active=1;
+
+ /* Figure out the processor topology first so we can have constant
+ work per processor.
+ */
+ ProcTop(GH->nprocs,&nprocx,&nprocy,&nprocz);
+ GH->nprocx = nprocx;
+ GH->nprocy = nprocy;
+ GH->nprocz = nprocz;
+
+
+ /* Set up the global size */
+ /* Try this exceptionally cheap method */
+ if (nx < 0 && GH->nprocx > 1) {
+ GH->nx = (GH->nprocx-2) * (-nx - 2*stencil_width) +
+ 2 * (-nx - stencil_width);
+ if (staggertype == PUGH_STAGGER) GH->nx -= nprocx-1;
+ } else {
+ GH->nx = abs(nx);
+ }
+
+ if (ny < 0 && GH->nprocy > 1) {
+ GH->ny = (GH->nprocy-2) * (-ny - 2*stencil_width) +
+ 2 * (-ny - stencil_width);
+ if (staggertype == PUGH_STAGGER) GH->ny -= nprocy-1;
+ } else {
+ GH->ny = abs(ny);
+ }
+
+ if (nz < 0 && GH->nprocz > 1) {
+ GH->nz = (GH->nprocz-2) * (-nz - 2*stencil_width) +
+ 2 * (-nz - stencil_width);
+ if (staggertype == PUGH_STAGGER) GH->nz -= nprocz-1;
+ } else {
+ GH->nz = abs(nz);
+ }
+
+ GH->stencil_width = stencil_width;
+
+
+ /*
+ if (Contains("commmodel","loc") != 0) {
+ GH->commmodel = PUGH_ALLOCATEDBUFFERS;
+ } else {
+ GH->commmodel = PUGH_DERIVEDTYPES;
+ }
+ */
+ GH->commmodel = PUGH_ALLOCATEDBUFFERS;
+
+#ifdef 0
+ /* Set up the IO Processor. */
+ GH->IO.outpfx[0] = '\0';
+
+ if (Contains("iomode","proc")) {
+ GH->IO.ioproc = GH->myproc;
+ GH->IO.nioprocs = GH->nprocs;
+ GH->IO.ioproc_every = 1;
+ } else if (Contains("iomode","np")) {
+ if (Geti("ioproc_every") > GH->nprocs) {
+ GH->IO.ioproc_every = GH->nprocs;
+ printf("Reducing ioproc_every to %d\n",GH->IO.ioproc_every);
+ } else {
+ GH->IO.ioproc_every = Geti("ioproc_every");
+ }
+ GH->IO.nioprocs = (GH->nprocs) / GH->IO.ioproc_every +
+ (GH->nprocs % GH->IO.ioproc_every ? 1 : 0);
+ GH->IO.ioproc = GH->myproc - GH->myproc % GH->IO.ioproc_every;
+ } else if (Contains_bounded("iomode","onefile")) {
+ GH->IO.ioproc = 0;
+ GH->IO.nioprocs = 1;
+ GH->IO.ioproc_every = GH->nprocs;
+ } else {
+ printf ("I don't understand iomode setting. Using one file\n");
+ GH->IO.ioproc = 0;
+ GH->IO.nioprocs = 1;
+ GH->IO.ioproc_every = GH->nprocs;
+ }
+
+ GH->IO.downsample = Geti("out_downsample");;
+ GH->IO.onefileperslice = Contains("onefileperslice","yes");
+
+ /* At the moment only have unchunked for a single output file */
+ if (Contains("iomode","unchunked")) {
+ if (GH->IO.ioproc_every >= GH->nprocs)
+ GH->IO.unchunkedresult = 1;
+ else {
+ printf("Unchunked out not currently supported for multiple\n");
+ printf("output file ... output will be chunked\n");
+ GH->IO.unchunkedresult = 0;
+ }
+ } else {
+ GH->IO.unchunkedresult = 0;
+ }
+
+ /* See if we should be verbose or not */
+ if (Contains("IO_verbose","yes")) {
+ GH->IO.verbose = 1;
+ } else {
+ GH->IO.verbose = 0;
+ }
+
+ /* See what we should output */
+ if (Contains("IO_datestamp","no")) {
+ GH->IO.datestamp = 0;
+ } else {
+ GH->IO.datestamp = 1;
+ }
+ if (Contains("IO_parameters","no")) {
+ GH->IO.parameters = 0;
+ } else {
+ GH->IO.parameters = 1;
+ }
+ if (Contains("IO_scalars","no")) {
+ GH->IO.scalars = 0;
+ } else {
+ GH->IO.scalars = 1;
+ }
+ if (Contains("IO_structures","no")) {
+ GH->IO.structures = 0;
+ } else {
+ GH->IO.structures = 1;
+ }
+
+ /* Check if Panda is on */
+#ifdef THORN_PANDA
+ GH->IO.panda_on = Contains("panda_active","yes");
+#else
+ GH->IO.panda_on = 0;
+#endif
+
+#endif /* 0 */
+
+ /* set staggering flag */
+ GH->stagger = staggertype;
+
+ if (Contains("bound","periodic")) {
+ GH->periodic = 1;
+ } else {
+ GH->periodic = 0;
+ }
+
+ /* Grid function list */
+ GH->ngridFuncs = 0;
+ GH->gridFuncs = NULL;
+
+ /* Array Llist */
+ GH->nArrays = 0;
+ GH->Arrays = NULL;
+
+ /* Allocate the bounds memory */
+ GH->lb = (int **)malloc(GH->nprocs*sizeof(int *));
+ GH->ub = (int **)malloc(GH->nprocs*sizeof(int *));
+ GH->neighbors = (int **)malloc(GH->nprocs*sizeof(int *));
+
+ GH->lb[0] = (int *)malloc(GH->nprocs*3*sizeof(int));
+ for (i=1;i<GH->nprocs;i++)
+ GH->lb[i] = GH->lb[0] + i*3;
+
+ GH->ub[0] = (int *)malloc(GH->nprocs*3*sizeof(int));
+ for (i=1;i<GH->nprocs;i++)
+ GH->ub[i] = GH->ub[0] + i*3;
+
+ GH->neighbors[0] = (int *)malloc(GH->nprocs*6*sizeof(int));
+ for (i=1;i<GH->nprocs;i++)
+ GH->neighbors[i] = GH->neighbors[0] + i*6;
+
+ /* Remote neighbor points */
+ GH->rnpoints = (int *)malloc(GH->nprocs*sizeof(int));
+ GH->rnx = (int *)malloc(GH->nprocs*sizeof(int));
+ GH->rny = (int *)malloc(GH->nprocs*sizeof(int));
+ GH->rnz = (int *)malloc(GH->nprocs*sizeof(int));
+
+ /* In general - we will noodle this later.*/
+ GH->level = 0;
+ GH->mglevel = 0;
+ GH->convlevel = 0;
+ GH->forceSync = 0;
+
+
+ /* Finally add myself to the pGH List */
+ /*
+ ltemp = (pGHList *)malloc(sizeof(pGHList));
+ ltemp->next = GHList;
+ ltemp->GH = GH;
+ GHList = ltemp;
+ */
+}
+
+
+void pGH_SetupBounds(pGH *GH,int nx,int ny,int nz) {
+ int i,j,k,l,dir,staglcv;
+ int xstep, ystep, zstep;
+ int lnx, lny, lnz;
+ int *dtindexes, *one, maxs, dirs[3];
+ int maxpoints, minpoints;
+ Double avgpoints;
+ int istart,jstart,kstart,iend,jend,kend;
+ int nprocx,nprocy,nprocz;
+ int staggertype;
+
+ staggertype = GH->stagger;
+
+ nprocx = GH->nprocx;
+ nprocy = GH->nprocy;
+ nprocz = GH->nprocz;
+
+ /* Now distribute the points among the processors. */
+
+ /* fixed work per proc? */
+ if (nx < 0) {
+ for (i=0;i<nprocx;i++)
+ for (j=0;j<nprocy;j++)
+ for (k=0;k<nprocz;k++) {
+ int pnum = i + nprocx*(j+nprocy*k);
+
+ /* Bounding box information */
+
+ if (i == 0) GH->lb[pnum][0] = 0;
+ else {GH->lb[pnum][0] = GH->ub[pnum-1][0] + 1 -
+ 2*(GH->stencil_width);
+ if(staggertype == PUGH_STAGGER) GH->lb[pnum][0] --;
+ }
+ GH->ub[pnum][0] = GH->lb[pnum][0] + abs(nx) - 1;
+
+ if (j == 0) GH->lb[pnum][1] = 0;
+ else {GH->lb[pnum][1] = GH->ub[pnum-nprocx][1] + 1 -
+ 2*(GH->stencil_width);
+ if(staggertype == PUGH_STAGGER) GH->lb[pnum][1] --;
+ }
+ GH->ub[pnum][1] = GH->lb[pnum][1] + abs(ny) - 1;
+
+ if (k == 0) GH->lb[pnum][2] = 0;
+ else {GH->lb[pnum][2] = GH->ub[pnum-nprocx*nprocy][2] + 1 -
+ 2*(GH->stencil_width);
+ if(staggertype == PUGH_STAGGER) GH->lb[pnum][2] --;
+ }
+ GH->ub[pnum][2] = GH->lb[pnum][2] + abs(nz) - 1;
+
+ }
+ }
+ else
+ {
+
+#ifdef THORN_BAM_ELLIPTIC
+ /* BB: for multigrid, boundary points of octants are treated as ghosts
+ put them on single processor!
+
+ 1. we use the standard algorithm on a grid reduced by stencil_width
+ in every direction to guarantee the proper grid alignment
+ */
+ if (Contains("grid", "BAM"))
+ { if( Contains("grid", "octant"))
+ { GH->nx -= GH->stencil_width;
+ GH->ny -= GH->stencil_width;
+ GH->nz -= GH->stencil_width;
+ }
+ if( Contains("grid", "bitant"))
+ GH->nz -= GH->stencil_width;
+ }
+#endif
+
+ xstep = (GH->nx-1) / nprocx;
+ ystep = (GH->ny-1) / nprocy;
+ zstep = (GH->nz-1) / nprocz;
+
+ for (i=0;i<nprocx;i++)
+ for (j=0;j<nprocy;j++)
+ for (k=0;k<nprocz;k++) {
+ int pnum = i + nprocx*(j+nprocy*k);
+
+ /* Bounding box information */
+ if (i == 0) GH->lb[pnum][0] = 0;
+ else {GH->lb[pnum][0] = i*xstep +1 - GH->stencil_width;
+ if(staggertype == PUGH_STAGGER)
+ GH->lb[pnum][0] --;
+ }
+ if (i == nprocx-1) GH->ub[pnum][0] = GH->nx-1;
+ else GH->ub[pnum][0] = (i+1)*xstep + GH->stencil_width;
+
+ if (j == 0) GH->lb[pnum][1] = 0;
+ else {GH->lb[pnum][1] = j*ystep +1 - GH->stencil_width;
+ if(staggertype == PUGH_STAGGER)
+ GH->lb[pnum][1] --;
+ }
+ if (j == nprocy-1) GH->ub[pnum][1] = GH->ny-1;
+ else GH->ub[pnum][1] = (j+1)*ystep + GH->stencil_width;
+
+ if (k == 0) GH->lb[pnum][2] = 0;
+ else {GH->lb[pnum][2] = k*zstep +1 - GH->stencil_width;
+ if(staggertype == PUGH_STAGGER)
+ GH->lb[pnum][2] --;
+ }
+ if (k == nprocz-1) GH->ub[pnum][2] = GH->nz-1;
+ else GH->ub[pnum][2] = (k+1)*zstep + GH->stencil_width;
+
+ }
+
+#ifdef THORN_BAM_ELLIPTIC
+ /* 2. add ghosts to grids at octant boundary using uniform shift */
+
+ if (Contains("grid", "BAM"))
+ { if (Contains("grid", "octant"))
+ { GH->nx += GH->stencil_width;
+ GH->ny += GH->stencil_width;
+ GH->nz += GH->stencil_width;
+ for (i = 0; i < GH->nprocs; i++) {
+ for (j = 0; j < 3; j++) {
+ if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width;
+ GH->ub[i][j] += GH->stencil_width;
+ }
+ }
+ }
+ if (Contains("grid", "bitant"))
+ { GH->nz += GH->stencil_width;
+ for (i = 0; i < GH->nprocs; i++) {
+ for (j = 2; j < 3; j++) {
+ if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width;
+ GH->ub[i][j] += GH->stencil_width;
+ }
+ }
+ }
+ }
+#endif
+
+ }
+}
+
+
+
+
+void pGH_SetupOwnership(pGH *GH) {
+ int i,j,k,l,dir,staglcv;
+ int xstep, ystep, zstep;
+ int lnx, lny, lnz;
+ int *dtindexes, *one, maxs, dirs[3];
+ int maxpoints, minpoints;
+ Double avgpoints;
+ int istart,jstart,kstart,iend,jend,kend;
+ int nprocx,nprocy,nprocz;
+
+ maxpoints = 0; minpoints = GH->nx*GH->ny*GH->nz; avgpoints = 0;
+
+ nprocx = GH->nprocx;
+ nprocy = GH->nprocy;
+ nprocz = GH->nprocz;
+
+ for (i=0;i<nprocx;i++)
+ for (j=0;j<nprocy;j++)
+ for (k=0;k<nprocz;k++) {
+ int pnum = i + nprocx*(j+nprocy*k);
+
+ /* Neighbors are easy! Just recall that i dir is x dir etc ...*/
+ if (i == nprocx-1)
+ if (GH->periodic)
+ /* Use the i = 0 processor */
+ GH->neighbors[pnum][XDP] = 0 + nprocx*(j+nprocy*k);
+ else
+ GH->neighbors[pnum][XDP] = -1;
+ else
+ GH->neighbors[pnum][XDP] = (i+1) + nprocx*(j+nprocy*k);
+
+ if (i == 0)
+ if (GH->periodic)
+ /* Use the nprocx processor */
+ GH->neighbors[pnum][XDM] = nprocx-1 + nprocx*(j+nprocy*k);
+ else
+ GH->neighbors[pnum][XDM] = -1;
+ else
+ GH->neighbors[pnum][XDM] = (i-1) + nprocx*(j+nprocy*k);
+
+ if (j == nprocy-1)
+ if (GH->periodic)
+ /* Use the 0 processor */
+ GH->neighbors[pnum][YDP] = (i) + nprocx*(0+nprocy*k);
+ else
+ GH->neighbors[pnum][YDP] = -1;
+ else
+ GH->neighbors[pnum][YDP] = (i) + nprocx*(j+1+nprocy*k);
+
+ if (j == 0)
+ if (GH->periodic)
+ /* Use the nprocy-1 processor */
+ GH->neighbors[pnum][YDM] = (i) + nprocx*(nprocy-1+nprocy*k);
+ else
+ GH->neighbors[pnum][YDM] = -1;
+ else
+ GH->neighbors[pnum][YDM] = (i) + nprocx*(j-1+nprocy*k);
+
+ if (k == nprocz-1)
+ if (GH->periodic)
+ /* Use the 0 processor */
+ GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*0);
+ else
+ GH->neighbors[pnum][ZDP] = -1;
+ else
+ GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*(k+1));
+
+ if (k == 0)
+ if (GH->periodic)
+ /* Use the nprocz-1 processor */
+ GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(nprocz-1));
+ else
+ GH->neighbors[pnum][ZDM] = -1;
+ else
+ GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(k-1));
+
+ /* And save the number of points on each processor
+ * so we can get at it later ...
+ */
+ GH->rnx[pnum] = (GH->ub[pnum][0]-GH->lb[pnum][0]+1);
+ GH->rny[pnum] = (GH->ub[pnum][1]-GH->lb[pnum][1]+1);
+ GH->rnz[pnum] = (GH->ub[pnum][2]-GH->lb[pnum][2]+1);
+ GH->rnpoints[pnum] = GH->rnx[pnum] * GH->rny[pnum] * GH->rnz[pnum];
+ if (GH->rnpoints[pnum] > maxpoints) maxpoints = GH->rnpoints[pnum];
+ if (GH->rnpoints[pnum] < minpoints) minpoints = GH->rnpoints[pnum];
+ avgpoints += GH->rnpoints[pnum];
+
+ }
+ avgpoints = (1.0 * avgpoints) / (GH->nprocs);
+
+ /* Local information */
+ GH->npoints = GH->rnpoints[GH->myproc];
+ GH->lnx = GH->rnx[GH->myproc];
+ GH->lny = GH->rny[GH->myproc];
+ GH->lnz = GH->rnz[GH->myproc];
+ GH->maxskew = (maxpoints-minpoints)/avgpoints * 100.0;
+
+
+ /* So now set up the ownership, ghosts, and overlap */
+
+ /* Ownership is pretty easy. Remember ownership is indexed as
+ [stagger][ijk][min/max]. See pGF_Reduction for a use of this,
+ among others.
+ Note: Ownership is same for staggered and non-staggered grids
+ */
+ GH->ownership[PUGH_VERTEXCTR][0][0] = (GH->lb[GH->myproc][0] == 0 ?
+ 0 : GH->stencil_width);
+ GH->ownership[PUGH_VERTEXCTR][1][0] = (GH->lb[GH->myproc][1] == 0 ?
+ 0 : GH->stencil_width);
+ GH->ownership[PUGH_VERTEXCTR][2][0] = (GH->lb[GH->myproc][2] == 0 ?
+ 0 : GH->stencil_width);
+
+ GH->ownership[PUGH_VERTEXCTR][0][1]=(GH->ub[GH->myproc][0] == GH->nx-1 ?
+ GH->lnx : GH->lnx - GH->stencil_width);
+ GH->ownership[PUGH_VERTEXCTR][1][1]=(GH->ub[GH->myproc][1] == GH->ny-1 ?
+ GH->lny : GH->lny - GH->stencil_width);
+ GH->ownership[PUGH_VERTEXCTR][2][1]=(GH->ub[GH->myproc][2] == GH->nz-1 ?
+ GH->lnz : GH->lnz - GH->stencil_width);
+ /* correct for periodic identification : Tue Jun 17 08:40:15 CDT 1997 */
+ if(GH->periodic)
+ {
+ GH->ownership[PUGH_VERTEXCTR][0][0] = GH->stencil_width;
+ GH->ownership[PUGH_VERTEXCTR][1][0] = GH->stencil_width;
+ GH->ownership[PUGH_VERTEXCTR][2][0] = GH->stencil_width;
+ GH->ownership[PUGH_VERTEXCTR][0][1] = GH->lnx - GH->stencil_width;
+ GH->ownership[PUGH_VERTEXCTR][1][1] = GH->lny - GH->stencil_width;
+ GH->ownership[PUGH_VERTEXCTR][2][1] = GH->lnz - GH->stencil_width;
+ }
+
+
+ if (GH->nx == 1) {
+ GH->ownership[PUGH_VERTEXCTR][0][0] = 0;
+ GH->ownership[PUGH_VERTEXCTR][0][1] = 1;
+ }
+ if (GH->ny == 1) {
+ GH->ownership[PUGH_VERTEXCTR][1][0] = 0;
+ GH->ownership[PUGH_VERTEXCTR][1][1] = 1;
+ }
+ if (GH->nz == 1) {
+ GH->ownership[PUGH_VERTEXCTR][2][0] = 0;
+ GH->ownership[PUGH_VERTEXCTR][2][1] = 1;
+ }
+
+ /* Great now set up the overlap region. This is the region we own,
+ but which is covered by our neighbors ghost. So this is what we
+ want to SEND
+ */
+ for (dir=0;dir<6;dir++) {
+ int tmp;
+ /* We want to send at the dir - 1 */
+ istart = 0; iend = GH->lnx;
+ jstart = 0; jend = GH->lny;
+ kstart = 0; kend = GH->lnz;
+
+ /* Correct to direction speicif sweeps ... */
+ if (dir == 0) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ istart = GH->stencil_width;
+ iend = istart + GH->stencil_width;
+ }
+ if (dir == 1) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = iend;
+ istart = tmp - 2 * GH->stencil_width;
+ iend = tmp - GH->stencil_width;
+ }
+
+ if (dir == 2) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ jstart = GH->stencil_width;
+ jend = jstart + GH->stencil_width;
+ }
+ if (dir == 3) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = jend;
+ jstart = tmp - 2 * GH->stencil_width;
+ jend = tmp - GH->stencil_width;
+ }
+
+ if (dir == 4) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ kstart = GH->stencil_width;
+ kend = kstart + GH->stencil_width;
+ }
+ if (dir == 5) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = kend;
+ kstart = tmp - 2 * GH->stencil_width;
+ kend = tmp - GH->stencil_width;
+ }
+ GH->overlap[PUGH_VERTEXCTR][dir][0][0] = istart;
+ GH->overlap[PUGH_VERTEXCTR][dir][0][1] = iend;
+ GH->overlap[PUGH_VERTEXCTR][dir][1][0] = jstart;
+ GH->overlap[PUGH_VERTEXCTR][dir][1][1] = jend;
+ GH->overlap[PUGH_VERTEXCTR][dir][2][0] = kstart;
+ GH->overlap[PUGH_VERTEXCTR][dir][2][1] = kend;
+ }
+
+ /* And finally the ghost indices. This used to be in
+ pGF_FinishRecv.c, but now it is here.
+ */
+ for (dir=0;dir<6;dir++) {
+ int tmp;
+ /* We want to send at the dir - 1 */
+ istart = 0; iend = GH->lnx;
+ jstart = 0; jend = GH->lny;
+ kstart = 0; kend = GH->lnz;
+
+ /* Correct to direction speicif sweeps ... */
+ /* Remember this is SW less than the other way... */
+ if (dir == 0) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ istart = 0;
+ iend = GH->stencil_width;
+ }
+ if (dir == 1) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = iend;
+ istart = tmp - GH->stencil_width;
+ iend = tmp;
+ }
+
+ if (dir == 2) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ jstart = 0;
+ jend = GH->stencil_width;
+ }
+ if (dir == 3) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = jend;
+ jstart = tmp - GH->stencil_width;
+ jend = tmp;
+ }
+
+ if (dir == 4) {
+ /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */
+ kstart = 0;
+ kend = GH->stencil_width;
+ }
+ if (dir == 5) {
+ /* Want to go from nx - 2 sw to nx - sw
+ (eg nx-2 to < nx-1, nx-4 to < nx-2)
+ Luckily iend is aready set to nx ... */
+ tmp = kend;
+ kstart = tmp - GH->stencil_width;
+ kend = tmp;
+ }
+
+ GH->ghosts[PUGH_VERTEXCTR][dir][0][0] = istart;
+ GH->ghosts[PUGH_VERTEXCTR][dir][0][1] = iend;
+ GH->ghosts[PUGH_VERTEXCTR][dir][1][0] = jstart;
+ GH->ghosts[PUGH_VERTEXCTR][dir][1][1] = jend;
+ GH->ghosts[PUGH_VERTEXCTR][dir][2][0] = kstart;
+ GH->ghosts[PUGH_VERTEXCTR][dir][2][1] = kend;
+ }
+
+ /* if staggering, set up ownership, overlaps, ghosts */
+ if(GH->stagger == PUGH_STAGGER) {
+ /* copy ownership from PUGH_VERTEXCTR */
+ for(i=1;i<PUGH_NSTAGGER;i++)
+ for(j=0;j<3;j++)
+ for(k=0;k<2;k++)
+ GH->ownership[i][j][k] = GH->ownership[PUGH_VERTEXCTR][j][k];
+
+ /* correct ownership for staggered grids */
+ for(i=0;i<PUGH_NSTAGGER;i++)
+ {
+ /* x-dir */
+ if(GH->ub[GH->myproc][0] != GH->nx-1)
+ GH->ownership[i][0][1] --;
+ else
+ if(i == PUGH_FACECTR_X)
+ GH->ownership[i][0][1] --;
+ /* y-dir */
+ if(GH->ub[GH->myproc][1] != GH->ny-1)
+ GH->ownership[i][1][1] --;
+ else
+ if(i == PUGH_FACECTR_Y)
+ GH->ownership[i][1][1] --;
+ /* z-dir */
+ if(GH->ub[GH->myproc][2] != GH->nz-1)
+ GH->ownership[i][2][1] --;
+ else
+ if(i == PUGH_FACECTR_Z)
+ GH->ownership[i][2][1] --;
+ }
+
+ /* set overlaps for staggered grids */
+ for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++)
+ for(dir=0;dir<6;dir++)
+ {
+ istart = 0; iend = GH->lnx;
+ jstart = 0; jend = GH->lny;
+ kstart = 0; kend = GH->lnz;
+ if(staglcv == PUGH_FACECTR_X) iend --;
+ if(staglcv == PUGH_FACECTR_Y) jend --;
+ if(staglcv == PUGH_FACECTR_Z) kend --;
+
+ if(dir == 0)
+ {
+ istart = GH->stencil_width;
+ iend = 2 * GH->stencil_width;
+ if(staglcv != PUGH_FACECTR_X)
+ {
+ istart ++;
+ iend ++;
+ }
+ }
+ if(dir == 1)
+ {
+ istart = GH->lnx - 2 * GH->stencil_width -1;
+ iend = istart + GH->stencil_width;
+ }
+ if(dir == 2)
+ {
+ jstart = GH->stencil_width;
+ jend = 2 * GH->stencil_width;
+ if(staglcv != PUGH_FACECTR_Y)
+ {
+ jstart ++;
+ jend ++;
+ }
+ }
+ if(dir == 3)
+ {
+ jstart = GH->lny - 2 * GH->stencil_width -1;
+ jend = jstart + GH->stencil_width;
+ }
+ if(dir == 4)
+ {
+ kstart = GH->stencil_width;
+ kend = 2 * GH->stencil_width;
+ if(staglcv != PUGH_FACECTR_Z)
+ {
+ kstart ++;
+ kend ++;
+ }
+ }
+ if(dir == 5)
+ {
+ kstart = GH->lnz - 2 * GH->stencil_width -1;
+ kend = kstart + GH->stencil_width;
+ }
+ GH->overlap[staglcv][dir][0][0] = istart;
+ GH->overlap[staglcv][dir][0][1] = iend;
+ GH->overlap[staglcv][dir][1][0] = jstart;
+ GH->overlap[staglcv][dir][1][1] = jend;
+ GH->overlap[staglcv][dir][2][0] = kstart;
+ GH->overlap[staglcv][dir][2][1] = kend;
+ }
+
+
+ /* set ghosts for staggered grids */
+ for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++)
+ for(dir=0;dir<6;dir++)
+ {
+ istart = 0; iend = GH->lnx;
+ jstart = 0; jend = GH->lny;
+ kstart = 0; kend = GH->lnz;
+ if(staglcv == PUGH_FACECTR_X) iend --;
+ if(staglcv == PUGH_FACECTR_Y) jend --;
+ if(staglcv == PUGH_FACECTR_Z) kend --;
+
+ if(dir == 0)
+ {
+ istart = 0;
+ iend = GH->stencil_width;
+ }
+ if(dir == 1)
+ {
+ istart = GH->lnx - GH->stencil_width;
+ iend = GH->lnx;
+ if(staglcv == PUGH_FACECTR_X)
+ {
+ istart --;
+ iend --;
+ }
+ }
+ if(dir == 2)
+ {
+ jstart = 0;
+ jend = GH->stencil_width;
+ }
+ if(dir == 3)
+ {
+ jstart = GH->lny - GH->stencil_width;
+ jend = GH->lny;
+ if(staglcv == PUGH_FACECTR_Y)
+ {
+ jstart --;
+ jend --;
+ }
+ }
+ if(dir == 4)
+ {
+ kstart = 0;
+ kend = GH->stencil_width;
+ }
+ if(dir == 5)
+ {
+ kstart = GH->lnz - GH->stencil_width;
+ kend = GH->lnz;
+ if(staglcv == PUGH_FACECTR_Z)
+ {
+ kstart --;
+ kend --;
+ }
+ }
+ GH->ghosts[staglcv][dir][0][0] = istart;
+ GH->ghosts[staglcv][dir][0][1] = iend;
+ GH->ghosts[staglcv][dir][1][0] = jstart;
+ GH->ghosts[staglcv][dir][1][1] = jend;
+ GH->ghosts[staglcv][dir][2][0] = kstart;
+ GH->ghosts[staglcv][dir][2][1] = kend;
+ }
+ }
+
+
+#ifdef MPI
+ /* Great, now we have made these, lets set up the derived data types */
+ for (i=0;GH->commmodel==PUGH_DERIVEDTYPES && i<PUGH_NSTAGGER;i++) {
+ int *dtindices, *one, dir;
+ int ii,jj,kk,ll;
+
+ /* OK so now lets set up the communications datatypes */
+ dirs[0] = GH->lny * GH->lnz;
+ dirs[1] = GH->lnx * GH->lnz;
+ dirs[2] = GH->lnx * GH->lny;
+
+ maxs = dirs[0];
+ maxs = (dirs[1] > maxs ? dirs[1] : maxs);
+ maxs = (dirs[2] > maxs ? dirs[2] : maxs);
+
+ maxs *= GH->stencil_width;
+
+
+ dtindices = (int *)malloc(maxs*sizeof(int));
+ one = (int *)malloc(maxs*sizeof(int));
+
+ for (dir=0;dir<6;dir++) {
+ /* Great so now calculate. Sends first */
+ ll = 0;
+ for (kk = GH->overlap[i][dir][2][0];
+ kk < GH->overlap[i][dir][2][1];
+ kk ++)
+ for (jj = GH->overlap[i][dir][1][0];
+ jj < GH->overlap[i][dir][1][1];
+ jj ++)
+ for (ii = GH->overlap[i][dir][0][0];
+ ii < GH->overlap[i][dir][0][1];
+ ii ++) {
+ dtindices[ll] = DATINDEX(GH,ii,jj,kk);
+ one[ll] = 1;
+ ll++;
+ }
+ MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE,
+ &(GH->send_dt[i][dir]));
+ CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->send_dt[i][dir])));
+ /* And recieves */
+ ll = 0;
+ for (kk = GH->ghosts[i][dir][2][0];
+ kk < GH->ghosts[i][dir][2][1];
+ kk ++)
+ for (jj = GH->ghosts[i][dir][1][0];
+ jj < GH->ghosts[i][dir][1][1];
+ jj ++)
+ for (ii = GH->ghosts[i][dir][0][0];
+ ii < GH->ghosts[i][dir][0][1];
+ ii ++) {
+ dtindices[ll] = DATINDEX(GH,ii,jj,kk);
+ one[ll] = 1;
+ ll++;
+ }
+ MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE,
+ &(GH->recv_dt[i][dir]));
+ CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->recv_dt[i][dir])));
+ }
+
+ free(dtindices);
+ free(one);
+ }
+#endif
+
+}
+
+#ifdef 0
+
+ /*@@
+ @routine DestroyPGH
+ @date Thu Aug 21 11:38:10 1997
+ @author Paul Walker
+ @desc
+ Destroys a GH object.
+ @enddesc
+@@*/
+void DestroyPGH(pGH **GHin) {
+ /* First remove me from the list. */
+ pGHList *tmp, *x;
+ pGH *GH;
+ int i;
+ int didit;
+
+ GH = *GHin;
+
+#ifdef MPI
+ CACTUS_MPI_ERROR(MPI_Comm_free(&(GH->PUGH_COMM_WORLD)));
+#endif
+
+ didit = 0;
+ if (GHList->GH == GH) {
+ x = GHList;
+ GHList = GHList->next;
+ didit = 1;
+ } else {
+ for (tmp = GHList; tmp->next && tmp->next->GH != GH; tmp = tmp->next) {
+ /* Do nothing */
+ }
+ if (tmp->next) {
+ x = tmp->next;
+ tmp->next = tmp->next->next;
+ didit = 1;
+ }
+ }
+ if (!didit) {
+ printf ("Can't find GH in list\n");
+ } else {
+ free(x);
+ }
+
+ /* Great. Now go about the work of destroying me. */
+ for (i=0;i<GH->ngridFuncs;i++)
+ DestroyPGF(GH, &(GH->gridFuncs[i]));
+
+ for (i=0;i<GH->nArrays;i++){
+ fflush(stdout);
+ DestroyPGArray(GH,&(GH->Arrays[i]));
+ }
+
+ free(GH->gridFuncs);
+ free(GH->Arrays);
+
+ free(GH->lb[0]);
+ free(GH->lb);
+
+ free(GH->ub[0]);
+ free(GH->ub);
+
+ free(GH->rnpoints);
+ free(GH->rnx);
+ free(GH->rny);
+ free(GH->rnz);
+
+ free(GH->neighbors[0]);
+ free(GH->neighbors);
+
+ free(GH);
+ *GHin = NULL;
+
+ fflush(stdout);
+}
+
+#endif
+
+ /*@@
+ @routine ProcTop
+ @date Thu May 1 10:13:40 1997
+ @author Paul Walker
+ @desc
+ Figures out a reasonable processor topology for a square
+ grid on n processors. That is, it figures out a,b,c such
+ that a*b*c = n and a,b, and c are in some senses minimal.
+ <p>
+ This algorithm will not guarantee the minimal condition,
+ or at least I can't prove it does, but it seems to on
+ a wide variety of processor numbers. So stop being a
+ weenie - not everything needs a proof!
+ <p>
+ Thie came to me in the shower, and is much better than
+ the old way I was doing it. I'm considering having my
+ working environment be the shower for the rest of time,
+ but there are some problems waterproofing my workstation.
+ @enddesc
+ @calledby SetupPGH
+@@*/
+
+void ProcTop(int np, int *npx, int *npy, int *npz) {
+ int x,y,z,r;
+
+ if (Contains("proc_topology","manual")) {
+ x = Geti("proc_top_nx");
+ y = Geti("proc_top_ny");
+ z = Geti("proc_top_nz");
+ if (x * y * z != np) {
+ printf ("Manual processor topology error. %d x %d x %d != %d\n",
+ x,y,z,np);
+#ifdef MPI
+ CACTUS_MPI_ERROR(MPI_Finalize());
+#endif
+ exit(0);
+ }
+ } else {
+ z = (int)(pow(np,1.0/3.0)+0.0001);
+ while (np % z != 0) z--;
+ r = np / z;
+ y = (int)(sqrt(r)+0.0001);
+ while (r % y != 0) y--;
+ x = r / y;
+
+#ifdef THORN_CARTOON_2D
+ if (Contains("cartoon_active","yes")) {
+ z = (int)(sqrt(np)+0.0001);
+ while (np % z != 0) z--;
+ y = 1;
+ x = np / z;
+ }
+#endif
+
+ if (x * y * z != np) {
+ printf ("FATAL ERROR in Processor Topology\n");
+ printf ("This should never happen. Please report ASAP!\n");
+ exit(0);
+ }
+ }
+
+ /* NB: PW reverses these for better alignment of comm groups */
+ *npz = x;
+ *npy = y;
+ *npx = z;
+
+}
diff --git a/src/Startup.c b/src/Startup.c
new file mode 100644
index 0000000..1f2f25a
--- /dev/null
+++ b/src/Startup.c
@@ -0,0 +1,67 @@
+ /*@@
+ @file Startup.c
+ @date Wed Feb 3 23:10:19 1999
+ @author Tom Goodale
+ @desc
+ Startup routines for pugh.
+ @enddesc
+ @@*/
+
+#include <stdio.h>
+
+#include "flesh.h"
+#include "GHExtensions.h"
+
+#include "pugh_extension.h"
+#include "pugh_Comm.h"
+
+static char *rcsid="$Header$";
+
+/* Store the handle in a global variable for the moment. */
+int PUGH_GHExtension;
+
+
+ /*@@
+ @routine pugh_Startup
+ @date Wed Feb 3 23:14:38 1999
+ @author Tom Goodale
+ @desc
+ The startup registration routine for PUGH.
+ @enddesc
+ @calls
+ @calledby
+ @history
+
+ @endhistory
+
+@@*/
+void pugh_Startup(void)
+{
+ printf("Registering PUGH GH extension\n");
+
+ /* Register the stuff in the GH extension. */
+ PUGH_GHExtension = CCTK_RegisterGHExtension("PUGH");
+
+ CCTK_RegisterGHExtensionSetupGH(PUGH_GHExtension, pugh_SetupGH);
+
+ CCTK_RegisterGHExtensionInitGH(PUGH_GHExtension, pugh_InitGH);
+
+ CCTK_RegisterGHExtensionrfrTraverseGH(PUGH_GHExtension, pugh_rfrTraverseGH);
+
+ /* Overload some functions */
+ CCTK_OverloadSyncGroup(pugh_SyncGroup);
+ CCTK_OverloadEnableGroupStorage(pugh_EnableGroupStorage);
+ CCTK_OverloadDisableGroupStorage(pugh_EnableGroupStorage);
+
+ CCTK_OverloadEnableGroupComm(pugh_EnableGroupComm);
+ CCTK_OverloadDisableGroupComm(pugh_EnableGroupComm);
+
+ CCTK_OverloadBarrier(pugh_Barrier);
+ CCTK_OverloadReduce(pugh_Reduce);
+ CCTK_OverloadInterp(pugh_Interp);
+
+ CCTK_OverloadParallelInit(pugh_ParallelInit);
+ CCTK_OverloadExit(pugh_Exit);
+ CCTK_OverloadAbort(pugh_Abort);
+
+}
diff --git a/src/include/pGArray.h b/src/include/pGArray.h
new file mode 100644
index 0000000..a30b04b
--- /dev/null
+++ b/src/include/pGArray.h
@@ -0,0 +1,20 @@
+ /*@@
+ @file pGArray.h
+ @date
+ @author
+ @desc
+ @enddesc
+ @version $Id$
+ @@*/
+
+typedef struct PGARRAY {
+ char *name;
+ int pgano;
+ Double *data;
+
+ int npoints;
+ struct PGH *parentGH;
+
+ int do_arrayio;
+ IOFile IEEEfile;
+} pGArray;
diff --git a/src/include/pGF.h b/src/include/pGF.h
new file mode 100644
index 0000000..d8dd2fa
--- /dev/null
+++ b/src/include/pGF.h
@@ -0,0 +1,100 @@
+ /*@@
+ @header pGF.h
+ @date Fri Feb 21 10:16:32 1997
+ @author Paul Walker
+ @desc
+ The pugh Grid Function structure. This is the heart of
+ the data storage and the like. You can't have a GF
+ without a GH, so you'd better see @seefile pGH.h
+ also.
+ <p>
+ One wonderfully new feature that a pGF has is that it contains
+ both a Double* padddata and a Double* data. What is the difference?
+ Well data is the base address of the fortran data set. It points
+ into padddata. This allows us, if we want to, to explicitly move
+ the starting point of the array to align with cache lines.
+ Important on some architectures. So the deal is if we are
+ in padding mode (see @seeroutine EnableGFDataStorage fro more
+ on that) padddata will be bigger than data, and data will point
+ to an address insinde padddata and such that lnx * lny * lnz
+ further down the data pointer is still in padddata. If we are
+ not in padding mode, data = padddata as pointers.
+ <p>
+ The only place we explicitly reference padddata is when we
+ create or de-allocate data. <b>All other operatoins shold
+ be done on *data</a>
+ @enddesc
+ @version $Id$
+ @@*/
+
+#define DATINDEX(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k)))
+#define DI(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k)))
+#define GDATINDEX(GH,i,j,k) ((i) + GH->nx*((j)+GH->ny*(k)))
+
+#ifndef IEEEIO
+typedef long IOFile;
+#endif
+
+typedef struct PGF {
+ char *name; /* The name of the grid function */
+ int gfno; /* My ID number in my GH parent. */
+ Double *padddata; /* Storage for the data. */
+ Double *data; /* See the note above. */
+ int buffer_sz[6]; /* Size of the face ghost zones */
+ Double **send_buffer; /* Storage for buffered Comm if */
+ Double **recv_buffer; /* we don't use derived types */
+ int commflag; /* What is the comm flag set to? */
+ int docomm[6]; /* Do we do comm or not? */
+ int storage; /* Do we have storage or not? */
+ int stagger; /* Only Vertex Centered now... */
+
+ struct PGH *parentGH; /* The GH to which I belong */
+ /* Note this is struct PGH whic is
+ typedeffed to pGH in pGH.h, but
+ that is included AFTER this so we
+ need the full name for the lookahead
+ structure reference thingy.
+ */
+
+ /* IO Related things */
+ FILE *convfile; /* File for convergence information */
+ FILE **xgfile; /* x,y,z and dline file pointers */
+ FILE **zerodfile; /* min,max,norm1,norm2 file pointers */
+ IOFile IEEEfile; /* File for IEEEIO */
+ char IEEEfname[512]; /* IEEEIO 3D File name (to preserve handles
+ we will calculate this once and then
+ perhaps reopen and close it).
+ */
+ IOFile IEEEfile_2d[3]; /* Two-D IEEEIO File in each dir */
+ int do_3dio; /* Do 3D I/O state flag */
+ int do_1dio; /* Do 1D I/O state flag */
+ int do_2dio; /* Do 2D I/O state flag */
+ int do_0dio; /* Do 0D (eg, min max) I/O */
+ int show_maxval; /* Show runtime maxval state flag */
+ int do_conv; /* Convegence test this function? */
+ int lastio_it[4]; /* Last iteration for I/O at 1, 2, and 3D.
+ We need this since we don't want to
+ multiply write data, which we can do
+ in convergence mode. So rather than
+ be clever we just store the iteration
+ when we write, and then don't write at
+ the same iteration twice.
+ */
+
+ Double maxval, minval; /* To store in case we toggle storage */
+ Double norm1, norm2;
+#ifdef MPI
+ MPI_Request sreq[6], rreq[6]; /* Comm requests and statuses. */
+ MPI_Status ms;
+#endif
+
+#ifdef 0
+#include "pGF_Extensions.h"
+
+#ifdef THORN_EXACT /* thorn Comp and thorn Exact merge */
+#include "thorn_exact/src/include/pGF_ComparisonExtensions.h"
+#endif
+
+#endif
+
+} pGF;
diff --git a/src/include/pGH.h b/src/include/pGH.h
new file mode 100644
index 0000000..4318612
--- /dev/null
+++ b/src/include/pGH.h
@@ -0,0 +1,132 @@
+ /*@@
+ @header pGH.h
+ @date Fri Feb 21 10:12:58 1997
+ @author
+ @desc
+ The Pugh Grid Hierarchy structure. There isn't much point in having
+ a GridHierarchy without GridFunctions, so you'd better see
+ @seefile pGF.h also.
+ @enddesc
+ @history
+ @hauthor Gabrielle Allen
+ @hdate Sep 10 @hdesc Added the iteration number on a GH
+ @endhistory
+ @history
+ @hauthor Gabrielle Allen @hdate 26 Sep 1998
+ @desc Changed IO parameter names, removed GH->iomode, added t_iopars
+ structure
+ @hauthor Gabrielle Allen @hdate 17 Oct 1998
+ @desc Added more IO parameters to IO structure
+ structure
+ @hauthor Gabrielle Allen @hdate 3 Nov 1998
+ @desc Added forceSync
+ @endhistory
+ @version $Id$
+ @@*/
+
+
+typedef struct PGH {
+
+#ifdef MPI
+ MPI_Comm PUGH_COMM_WORLD; /* A MPIcommunicator */
+#endif
+
+ /* Size of the processor group */
+ int nprocs; /* Number of processors */
+ int myproc; /* My processor */
+ int nprocx,nprocy,nprocz; /* Processor topology */
+ int commmodel; /* Comm model is PUGH_ALLOCATEDBUFFERS */
+ /* or PUGH_DERIVEDTYPES. Currently unused */
+
+ /* Size of the problems */
+ int ngridFuncs; /* # of Grid Functions */
+ pGF **gridFuncs; /* Pointers to them */
+ int nx,ny,nz; /* Physical size of the Problem */
+ int stencil_width; /* Width of ghost zone */
+
+ int nArrays; /* # of pGArrays */
+ pGArray **Arrays; /* Pointers to them */
+
+ /* Processor group layouts */
+ Double maxskew; /* Maximum point skew */
+ int periodic; /* Is the system periodic? */
+ int **lb; /* Lower bound (nprocs X 3) for each proc */
+ int **ub; /* Upper bound (same sizes) */
+ int lnx, lny, lnz; /* Size on this processor */
+ int npoints; /* LOCAL number of points on this proc. */
+ int *rnpoints; /* Number of points on each proc */
+ int *rnx, *rny, *rnz; /* Number of points on each proc */
+ int **neighbors; /* An array of size nprocs X 6 of the
+ * neighbors on each side. The entry in
+ * this array is the processor number of
+ * the neighbor.
+ */
+ int stagger; /* Is staggering turned on? */
+ int spxyz[3]; /* From what index to start slicing: 1 for octant, */
+ /* nx/2 for box, e.g. Set in SetupSlicingCenter, */
+ /* used in Write3D and modfied in in BoxInBox.c ! */
+ int forceSync; /* force synchronisation of GFs with storage */
+
+ int sp2dxyz[3]; /* Slice point for two d slices. */
+
+ /* Information about ghosts and overlaps. */
+ int ownership[PUGH_NSTAGGER][3][2]; /* The box owned in each direction. */
+ /* Indexing here is [stagger][ijk][min/max]*/
+
+ int ghosts[PUGH_NSTAGGER][6][3][2]; /* The ghost zones on each face. */
+ /* Indexing here is */
+ /* [stagger][face][ijk][min/max] */
+
+ int overlap[PUGH_NSTAGGER][6][3][2]; /* The overlap region owned on each face. */
+ /* Indexing here is */
+ /* [stagger][face][ijk][min/max] */
+
+
+ /* Coordinate information */
+ Double cx0, cy0, cz0; /* Origin of our system */
+ Double dx0, dy0, dz0, dt0; /* Delta of our system */
+ Double lx0, ly0, lz0; /* Processor-Local coordinate origins */
+ Double phys_time; /* physical time */
+ int GHiteration; /* iteration number on processor */
+
+
+
+
+ /* currently, all GHs are listed in a single, global GHList */
+ /* they are distinguished by a unique combination of level numbers */
+ int level; /* level in Berger-Oliger stack */
+ int mglevel; /* level in multigrid stack */
+ int convlevel; /* level in convergence stack */
+
+ int active; /* 1 -> GH active; 0 -> GH is skipped for conv.test */
+ /* is used in NanCheckGH.c */
+#ifdef MPI
+ /* Derived data types for communication */
+ MPI_Datatype send_dt[PUGH_NSTAGGER][6], recv_dt[PUGH_NSTAGGER][6];
+#endif
+
+ /* Extension of this structure can be brought in by thorns */
+ /*
+#include "pGH_Extensions.h"
+ */
+} pGH;
+
+
+/* This defines a single linked list of grid structures */
+/* the first item has level=0, next level=1, etc.. */
+
+typedef struct pGHLIST {
+ pGH *GH;
+ struct pGHLIST *next;
+} pGHList;
+
+
+
+#define XDP 1
+#define XDM 0
+#define YDP 3
+#define YDM 2
+#define ZDP 5
+#define ZDM 4
+
+
diff --git a/src/include/pugh.h b/src/include/pugh.h
new file mode 100644
index 0000000..673eea0
--- /dev/null
+++ b/src/include/pugh.h
@@ -0,0 +1,150 @@
+/*@@
+ @header pugh.h
+ @author Paul Walker
+ @date March 1997
+ @desc
+ This is just a bunch of includes and some simple definitions.
+ You should really see @seefile pGF.h @seefile pGH.h and
+ @seefile pughProtos.h for the pugh bit, and @seefile
+ pughProblem.h for the cactus-specific bits.
+ @enddesc
+ @version $Id$
+@@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <malloc.h>
+#include <assert.h>
+
+/* 3 ways up shutting down: */
+#define STOP cshutdown(0);
+#define ABORT cabort(0);
+#define CORE ccore();
+
+/* Two ways to shut down with a non-zero exit code. */
+#define ERROR_STOP(xerrcode) cshutdown(xerrcode);
+#define ERROR_ABORT(xerrcode) cabort(xerrcode);
+
+
+/* We have our own Cactus memory routines in pughutils.c */
+
+#if defined(SGI) || defined(T3E)
+#undef malloc
+#define malloc(x) cactus_malloc(x)
+void *cactus_malloc(size_t);
+#undef free
+#define free(x) cactus_free(x)
+void cactus_free(void *);
+#endif
+
+#ifdef MPI
+#include "mpi.h"
+#endif
+
+#ifdef HDF
+#include "hdf.h"
+#endif
+
+#ifdef IEEEIO
+#include "IEEEIO.h"
+#endif
+
+#ifdef Panda_IO
+#include "PandaIO.h"
+#endif
+
+#ifdef TCP
+#include "TCP++/DataConnection.h"
+#include "TCP++/CommandProcessor.h"
+#endif
+
+/* Handle precision */
+#ifdef SINGLE_PRECISION
+#define PUGH_MPI_TYPE MPI_FLOAT
+typedef float Double;
+#else
+#define PUGH_MPI_TYPE MPI_DOUBLE
+typedef double Double;
+#endif
+
+#include "pugh_constants.h"
+
+#ifdef 0
+/* We need to include active thorns here so that the thorns can
+ modify the grid function and grid hierarchy (as box in box does).
+ */
+#include "pughActiveThorns.h"
+
+#ifdef THORN_BOXINBOX
+#include "AMRwriter.h"
+#endif
+
+#endif
+
+#include "pGF.h"
+#include "pGArray.h"
+#include "pGH.h"
+
+
+#ifdef 0
+/* Timing stuff */
+#include "pughTimers.h"
+
+#include "pughProtos.h"
+
+
+
+#ifndef WIN32
+#define BOLDON "\033[1m"
+#define BOLDOFF "\033[0m"
+#else
+#define BOLDON ""
+#define BOLDOFF ""
+#endif
+
+#if (defined(RS6000) || defined(SPX)||defined(HP))
+#define FORTRAN_NAME(x,y,z) z
+#else
+#if (defined(nCUBE2) || defined(CRAYC90) || defined(CRAYT3D) || defined (CRAYT3E))
+#define FORTRAN_NAME(x,y,z) y
+#else
+#if (defined(WIN32))
+#define FORTRAN_NAME(x,y,z) y
+#else
+#define FORTRAN_NAME(x,y,z) x
+#endif
+#endif
+#endif
+
+#endif /*0*/
+
+#ifdef MPI
+
+#define CACTUS_MPI_ERROR(xf) do {int errcode; \
+ if((errcode = xf) != MPI_SUCCESS) \
+ { \
+ char mpi_error_string[MPI_MAX_ERROR_STRING+1]; \
+ int resultlen; \
+ MPI_Error_string(errcode, mpi_error_string, &resultlen);\
+ fprintf(stderr, "MPI Call %s returned error code %d (%s)\n", \
+ #xf, errcode, mpi_error_string); \
+ fprintf(stderr, "At line %d of file %s\n", \
+ __LINE__, __FILE__); \
+ } \
+ } while (0)
+#endif
+
+
+#include "pughDriver.h"
+
+#ifdef 0
+/* Runtime function repository */
+#include "rfr.h"
+
+/* Scalar Manipulation Functions. */
+
+#include "CactusScalarProtos.h"
+
+#endif
diff --git a/src/include/pughDriver.h b/src/include/pughDriver.h
new file mode 100644
index 0000000..73415ea
--- /dev/null
+++ b/src/include/pughDriver.h
@@ -0,0 +1,68 @@
+/*@@
+ @header pughDriver.h
+ @author Paul Walker
+ @date March 1997
+ @desc
+ This is stuff having to do with the driver, namely
+ the problem and globals, as well as definitions of what
+ the fort interfaces are. Also see @seefile pughProblem.h
+ for what the cactus-specific stuff is. The macros FORT_ARGS
+ and FORT_ARGS_PROTO passed from c routines must match
+ FORT_GH_DECOMP and FORT_GHDECOMP_DECL received in fortran
+ routines defined in @seefile pughFortran.h
+ @enddesc
+ @history
+ @hauthor Gabrielle Allen
+ @hdate Sep 10 @hdesc Added the GH iteration number to fortran macros
+ @hauthor Gabrielle Allen @hdate 29 Sep 1998
+ @hdesc Removed levfac
+ @endhistory
+ @version $Id$
+ @@*/
+
+#ifdef 0
+extern pGHList *GHList;
+
+extern int nx0, ny0, nz0;
+extern int lb[3],ub[3],sh[3],bbox[6];
+extern int convergence;
+
+/* Timing variables */
+extern double commt[PUGH_NTIMERS];
+extern double elltime[PUGH_NTIMERS];
+
+/* How long to run */
+extern int itfirst, itlast, itout; /* Iteration runs */
+extern int itout3, itout2, itout1, itout0, itoutarr, itinfo;
+
+extern int _show_storage;
+
+/* termination flags: pe-local and global: */
+extern int cactus_terminate;
+
+/* FORT_ARGS must match FORT_ARGS_PROTO below, and FORT_GH_DECOMP,
+ FORT_GHDECOMP_DECL in pughFortran.h */
+#define FORT_ARGS(xGH) xGH, \
+ &(xGH->lnx),&(xGH->lny), &(xGH->lnz),\
+ lb,ub,sh,bbox, \
+ &(xGH->nx),&(xGH->ny),&(xGH->nz),\
+ &(xGH->cx0),&(xGH->cy0),&(xGH->cz0), \
+ &(xGH->dx0),&(xGH->dy0),&(xGH->dz0),&(xGH->dt0), &(xGH->phys_time), \
+ &(xGH->level), &(xGH->nprocs), &(xGH->myproc), \
+ &(xGH->convlevel), &(xGH->stencil_width), &(xGH->GHiteration)
+
+/* FORT_ARGS_PROTO must match FORT_ARGS above, and FORT_GH_DECOMP,
+ FORT_GHDECOMP_DECL in pughFortran.h */
+#define FORT_ARGS_PROTO pGH *, \
+ int *, int *, int *, \
+ int *, int *, int *, int *, \
+ int *, int *, int *, \
+ Double *, Double *, Double *, \
+ Double *, Double *, Double *, Double *, Double *, \
+ int *, int *, int *, \
+ int *, int *, int *
+
+
+#include "pughProblem.h"
+
+#endif
diff --git a/src/include/pughProblem.h b/src/include/pughProblem.h
new file mode 100644
index 0000000..2024a09
--- /dev/null
+++ b/src/include/pughProblem.h
@@ -0,0 +1,53 @@
+/*@@
+ @header pughProblem.h
+ @author Paul Walker
+ @date March 1997
+ @desc
+ This routine declares all the problem-specific globals
+ and teaches the driver how to make the grid functions
+ with the right names, and pass them to fortran. It
+ isn't that exciting, but it's pretty damned important
+ if you need to add new fields.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#ifdef 0
+#include "gfIndices.h"
+#include "pughProblemDefs.h"
+
+/* Variables that ALL codes use. */
+#define USER_VARS \
+ int *_one;
+
+/* Main externals */
+extern int *_one;
+
+/* Historical: Two macros for the same thing. PW. */
+#define PASS_FORT_FIELDS(xGH) \
+ PASS_FORT_FIELDS_SUBSET(xGH)
+
+
+/* If you add items to PASS_FORT_FIELDS make sure */
+/* you add the right type at the right position */
+/* in PROTO_FORT_FIELDS */
+#define PROTO_FORT_FIELDS \
+ PROTO_FORT_FIELDS_SUBSET
+
+
+#ifdef WIN32
+#define FMODIFIER __stdcall
+#else
+#define FMODIFIER
+#endif
+
+#ifdef THORN_CHECKPOINT
+ /* Is it possible to ifdef this? Or perhaps to rfr it? */
+#define f_recover FORTRAN_NAME(recover_,RECOVER,recover)
+void FMODIFIER f_recover(FORT_ARGS_PROTO,PROTO_FORT_FIELDS);
+#endif
+
+#include "cactus_constants.h"
+
+#endif
+
diff --git a/src/include/pugh_constants.h b/src/include/pugh_constants.h
new file mode 100644
index 0000000..f8bc138
--- /dev/null
+++ b/src/include/pugh_constants.h
@@ -0,0 +1,64 @@
+ /*@@
+ @file pugh_constants.h
+ @date
+ @author
+ @desc
+ @enddesc
+ @version $Id$
+ @@*/
+
+/* Pre-processor constants start with PUGH_ */
+
+/* Constants for the comm directions */
+#define PUGH_NOCOMM 0
+#define PUGH_ALLCOMM 1
+#define PUGH_XCOMM 2
+#define PUGH_YCOMM 3
+#define PUGH_ZCOMM 4
+#define PUGH_PLUSFACESCOMM 5
+#define PUGH_MINUSFACESCOMM 6
+
+/* Constants for the storage */
+#define PUGH_NOSTORAGE 0
+#define PUGH_STORAGE 1
+
+/* Constants for the GF Type. Only one now ... */
+/* Number of staggerings available in the code */
+#define PUGH_NSTAGGER 4
+
+/* The index for each staggering type */
+/* This classifies each PGF member */
+#define PUGH_VERTEXCTR 0
+#define PUGH_FACECTR_X 1
+#define PUGH_FACECTR_Y 2
+#define PUGH_FACECTR_Z 3
+
+/* The index for each staggering type */
+/* This determines the PGH should be set up */
+#define PUGH_NO_STAGGER 0
+#define PUGH_STAGGER 1
+
+/* Comm types available */
+#define PUGH_NOCOMMBUFFERS 0
+#define PUGH_ALLOCATEDBUFFERS 1
+#define PUGH_DERIVEDTYPES 2
+
+/* Termination flags */
+#define TERMINATION_NOT_RAISED 0 /* no termination flag, inital setting */
+#define TERMINATION_RAISED_LOCAL 1 /* raised on one pe, has to be broadcasted */
+#define TERMINATION_RAISED_BRDCAST 2 /* flag now available on all pes: termiante */
+
+/* IO precision */
+#ifdef SINGLE_PRECISION
+#define IO_FLOAT FLOAT32
+#else
+#define IO_FLOAT FLOAT64
+#endif
+
+/* IO precision */
+#ifdef SINGLE_PRECISION
+#define CACTUS_FLOAT FLOAT32
+#else
+#define CACTUS_FLOAT FLOAT64
+#endif
+
diff --git a/src/make.code.defn b/src/make.code.defn
new file mode 100644
index 0000000..640faaa
--- /dev/null
+++ b/src/make.code.defn
@@ -0,0 +1,8 @@
+# Main make.code.defn file for thorn pugh
+# : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n
+# Source files in this directory
+SRCS = Startup.c GHExtension.c Comm.c SetupPGH.c
+
+# Subdirectories containing source files
+SUBDIRS =
+
diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c
new file mode 100644
index 0000000..9a5cf71
--- /dev/null
+++ b/src/pugh/SetupPGArray.c
@@ -0,0 +1,84 @@
+ /*@@
+ @file SetupArray.c
+ @date Fri Feb 21 11:04:20 1997
+ @author
+ @desc
+ Sets up (eg allocates) the pugh Array. This is trivial.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine SetupPGArray
+ @date Fri Feb 21 11:05:04 1997
+ @author
+ @desc
+ Note that npoints is a GLOBAL measure.
+ @enddesc
+ @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGArray *SetupPGArray(pGH *GH, char *name, int npoints) {
+ pGArray *res; /* The result */
+ pGArray **rlist; /* A temporary to add to the GH */
+ int lnpoints, i;
+
+ /* Space for the struct */
+ res = (pGArray *)malloc(sizeof(pGArray));
+
+ /* Set my parent GH */
+ res->parentGH = GH;
+
+ /* Set up the name */
+ res->name = (char *)malloc(strlen(name)*sizeof(char));
+ strcpy(res->name,name);
+
+ /* Allocate my memory */
+ if (npoints > 0) {
+ lnpoints = npoints/GH->nprocs;
+ if (GH->myproc == GH->nprocs - 1) {
+ lnpoints = npoints - lnpoints * (GH->nprocs-1);
+ }
+ res->npoints = lnpoints;
+ res->data = (Double *)malloc(lnpoints*sizeof(Double));
+ } else {
+ res->npoints = 1;
+ res->data = (Double *)malloc(sizeof(Double));
+ }
+
+ /* Setup IO stuff to default no IO with space for xgraph files.*/
+ res->do_arrayio = 0;
+
+ res->IEEEfile = NULL;
+
+ /* Finally add this to the list of grid funcs in the gh */
+ GH->nArrays ++;
+ rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *));
+ if (GH->nArrays > 1) {
+ for (i=0;i<GH->nArrays-1;i++)
+ rlist[i] = GH->Arrays[i];
+ free(GH->Arrays);
+ }
+ res->pgano = GH->nArrays - 1;
+ rlist[GH->nArrays-1] = res;
+ GH->Arrays = rlist;
+
+ /* And return myself... */
+ return res;
+}
+
+void DestroyPGArray(pGH *GH, pGArray **pGAin) {
+ pGArray *pGA;
+ pGA = *pGAin;
+
+ free(pGA->name);
+ free(pGA->data);
+
+ free(pGA);
+ *pGAin=NULL;
+}
diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c
new file mode 100644
index 0000000..acec587
--- /dev/null
+++ b/src/pugh/SetupPGF.c
@@ -0,0 +1,438 @@
+ /*@@
+ @file SetupPGF.c
+ @date Fri Feb 21 11:04:20 1997
+ @author
+ @desc
+ Sets up (eg allocates) the pugh Grid Function. This is
+ crucial to understaiding the grid function struct.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine SetupPGF
+ @date Fri Feb 21 11:05:04 1997
+ @author
+ @desc
+ Allocates the Grid Func structure (I wanted to say object ...)
+ There are several steps to this
+ <ul>
+ <li> Set up the name
+ <li> Set up the docomm flag and IO flags
+ <li> Allocate output file pointer arrays
+ <li> Allocate the data buffer
+ <li> Allocate the send buffer
+ <li> Add myself to the Grid Hierarchy
+ </ul>
+ But this is pretty straightforward code nonetheless.
+ <p>
+ Note that the input variable "storage" determines whether to
+ allocate the 3-D data or not. You can toggle this later
+ with @seeroutine DisableGFDataStorage and @seeroutine
+ EnableGFDataStorage
+ @enddesc
+ @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) {
+ pGF *res; /* The result */
+ pGF **rlist; /* A temporary to add to the GH */
+ int i, dir, dirp1, dirp2, sz; /* Counter thingies */
+
+ /* Space for the struct */
+ res = (pGF *)malloc(sizeof(pGF));
+
+ /* Set my parent GH */
+ res->parentGH = GH;
+
+ /* Set up the name */
+ res->name = (char *)malloc((1+strlen(name))*sizeof(char));
+ strcpy(res->name,name);
+
+ res->stagger = stagger;
+ if( (GH->stagger == PUGH_NO_STAGGER) &&
+ (res->stagger != PUGH_VERTEXCTR) )
+ { printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n");
+ printf (" designation PUGH_NO_STAGGER. \n");
+ STOP;
+ }
+
+
+ /* Setup IO stuff to default no IO with space for xgraph files.*/
+ res->do_3dio = 0;
+ res->do_2dio = 0;
+ res->do_1dio = 0;
+ res->do_0dio = 0;
+ res->do_conv = 0;
+
+ res->convfile = NULL;
+
+ res->xgfile = (FILE **)malloc(5*sizeof(FILE *));
+ for (i=0;i<5;i++)
+ res->xgfile[i] = NULL;
+
+ res->zerodfile = (FILE **)malloc(4*sizeof(FILE *));
+ for (i=0;i<4;i++)
+ res->zerodfile[i] = NULL;
+
+ res->IEEEfile = NULL;
+
+ for (i=0;i<3;i++)
+ res->IEEEfile_2d[i] = NULL;
+
+ res->show_maxval = 0;
+
+ res->lastio_it[0] = -1; res->lastio_it[1] = -1;
+ res->lastio_it[2] = -1; res->lastio_it[3] = -1;
+
+ SetGFComm(res, docomm);
+
+
+ /* Finally add this to the list of grid funcs in the gh */
+ GH->ngridFuncs ++;
+ rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *));
+ if (GH->ngridFuncs > 1) {
+ for (i=0;i<GH->ngridFuncs-1;i++)
+ rlist[i] = GH->gridFuncs[i];
+ free(GH->gridFuncs);
+ }
+ res->gfno = GH->ngridFuncs - 1;
+ rlist[GH->ngridFuncs-1] = res;
+ GH->gridFuncs = rlist;
+ /* Set up the comm layer */
+ res->padddata = NULL;
+ res->data = NULL;
+ res->send_buffer = NULL;
+ res->recv_buffer = NULL;
+
+ if (storage == PUGH_NOSTORAGE) {
+ res->storage = PUGH_STORAGE; /* Fake it out... */
+ DisableGFDataStorage(GH, res);
+ } else if (storage == PUGH_STORAGE) {
+ res->storage = PUGH_NOSTORAGE; /* Fake it out... */
+ EnableGFDataStorage(GH, res);
+ } else {
+ printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage);
+ printf (" Perhaps misspelled in GridFuncList? Defaulting to storage active.\n");
+ EnableGFDataStorage(GH, res);
+ }
+
+#ifdef MPI
+ /* Null my send and recieve requests */
+ for (i=0;i<6;i++) {
+ res->sreq[i] = MPI_REQUEST_NULL;
+ res->rreq[i] = MPI_REQUEST_NULL;
+ }
+#endif
+
+ /* And return myself... */
+ return res;
+}
+
+ /*@@
+ @routine DestroyPGF
+ @date Thu Aug 21 11:44:22 1997
+ @author Paul Walker
+ @desc
+ Destroys a GF object.
+ @enddesc
+@@*/
+
+void DestroyPGF(pGH *GH, pGF **GFin) {
+ pGF *GF;
+ GF = *GFin;
+
+ if (GF->storage)
+ DisableGFDataStorage(GH, GF);
+
+ free(GF->name);
+ free(GF->padddata);
+
+ free(GF->xgfile);
+ free(GF->zerodfile);
+
+ GF->data = NULL;
+
+ free(GF);
+ *GFin = NULL;
+
+}
+
+
+ /*@@
+ @routine EnableGFDataStorage
+ @date Thu Apr 3 12:44:38 1997
+ @author Paul Walker
+ @desc
+ This routine toggles the data storage to the "on"
+ position. That means that data points to a 3D
+ array, rather than a single Double, and the
+ storage flag is set to one. This is used quite
+ a lot by thorns which need to toggle memory. for
+ instance, see the trK Driver in the util thron.
+ @enddesc
+@@*/
+
+/* Statics for padding */
+static int padding_active = -1;
+static int padding_cacheline_bits, padding_size, padding_address_spacing;
+
+int zero_memory = -1;
+
+void EnableGFDataStorage(pGH *GH, pGF *GF) {
+ int i, dir, dirp1, dirp2, sz;
+ int special_pad, cache_size, start;
+
+ if (zero_memory < 0) {
+ zero_memory = Contains("zero_memory","yes");
+ if (zero_memory) {
+ printf ("Zeroing memory for allocated GFs at alloc time\n");
+ }
+ }
+
+
+ if (padding_active == -1) {
+ padding_active = Contains("padding_active","yes");
+ padding_cacheline_bits = Geti("padding_cacheline_bits");
+ padding_size = Geti("padding_size");
+ padding_address_spacing = Geti("padding_address_spacing");
+ if (padding_active) {
+ printf ("PUGH Memory padding active. Stats:\n");
+ printf (" cacheline_bits : %d\n",
+ padding_cacheline_bits);
+ printf (" size : %d\n",
+ padding_size);
+ printf (" spacing : %d\n",
+ padding_address_spacing);
+ }
+ }
+
+ if (GF->storage) {
+ printf ("WARNING: Tried to enable %s when already enabled\n",
+ GF->name);
+ return;
+ }
+
+ /* Set up the send buffers. Note we only do this if
+ we are not using the derived types comm style.
+ */
+ assert(!GF->send_buffer);
+ GF->send_buffer = (Double **)malloc(6*sizeof(Double *));
+
+ assert(!GF->recv_buffer);
+ GF->recv_buffer = (Double **)malloc(6*sizeof(Double *));
+
+ for (i=0;i<6;i++) {
+ dir = i/2;
+ dirp1 = (i/2+1)%3;
+ dirp2 = (i/2+2)%3;
+
+ if (dir == 0) {
+ sz = GH->stencil_width * GH->lny * GH->lnz;
+ } else if (dir == 1) {
+ sz = GH->stencil_width * GH->lnx * GH->lnz;
+ } else {
+ sz = GH->stencil_width * GH->lnx * GH->lny;
+ }
+
+ if (GH->neighbors[GH->myproc][i] >= 0) {
+ GF->buffer_sz[i] = sz;
+ GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+ GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+ assert(GF->recv_buffer[i]);
+ assert(GF->send_buffer[i]);
+ } else {
+ GF->buffer_sz[i] = 0;
+ GF->send_buffer[i] = NULL;
+ GF->recv_buffer[i] = NULL;
+ }
+ }
+
+ /* Set up the storage */
+ if (GF->padddata) free (GF->padddata);
+ if (!padding_active) {
+ GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double));
+ GF->data = GF->padddata; /* No padding case */
+ if (zero_memory) {
+ for (i=0;i<GH->npoints;i++)
+ GF->padddata[i] = 0.0;
+ }
+ } else {
+ GF->padddata = (Double *)malloc((GH->npoints + padding_size)
+ *sizeof(Double));
+
+ /* Align the actual starting address on a cache line.
+ More specifically align on a unique cache line for
+ each field.
+ */
+ cache_size = 2 << (padding_cacheline_bits - 1);
+ start = ((long) (GF->padddata) / sizeof(Double))%cache_size;
+
+ special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size;
+ GF->data = (GF->padddata) + special_pad;
+
+ if (special_pad > padding_size){
+ printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size);
+ STOP;
+ }
+ if (zero_memory) {
+ for (i=0;i<GH->npoints+padding_size;i++)
+ GF->padddata[i] = 0.0;
+ }
+ }
+
+ if (!GF->padddata) {
+ printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n",
+ GF->name, GF->gfno);
+ STOP;
+ }
+ GF->storage = 1;
+
+
+}
+
+ /*@@
+ @routine DisableGFDataStorage
+ @date Thu Apr 3 12:45:34 1997
+ @author Paul Walker
+ @desc
+ This routine disables the grid function storage.
+ That is, it un-allocates the 3D array and in its
+ place allocates a single Double with the value
+ 0.0. This allows us to still ahve something to
+ pass around (an array of size (1,1,1) in fortran
+ speak) but also to not need all our 3D arrays
+ "on" all the time.
+ @enddesc
+@@*/
+
+
+void DisableGFDataStorage(pGH *GH, pGF *GF) {
+ int i;
+
+ if (!GF->storage) {
+ printf ("Warning: Tried to disable %s when already disabled\n",
+ GF->name);
+ return;
+ }
+
+ if (GF->padddata) {
+ free(GF->padddata);
+ GF->padddata = NULL;
+ GF->data = NULL;
+ }
+
+ if (GF->send_buffer) {
+ for (i=0;i<6;i++)
+ if (GF->send_buffer[i]) {
+#ifdef MPI
+ if(GF->sreq[i] != MPI_REQUEST_NULL){
+ CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i])));
+ }
+#endif
+ free(GF->send_buffer[i]);
+ GF->send_buffer[i] = NULL;
+ }
+ free(GF->send_buffer);
+ GF->send_buffer = NULL;
+ }
+
+
+ if (GF->recv_buffer) {
+ for (i=0;i<6;i++)
+ if (GF->recv_buffer[i]) {
+ free(GF->recv_buffer[i]);
+ GF->recv_buffer[i] = NULL;
+ }
+ free(GF->recv_buffer);
+ GF->recv_buffer = NULL;
+ }
+
+ GF->padddata = (Double *)malloc(sizeof(Double));
+ GF->data = GF->padddata;
+ GF->data[0] = 0.0; /* Very important! */
+ GF->storage = 0;
+}
+
+ /*@@
+ @routine SetGFComm
+ @date Thu Apr 3 12:46:23 1997
+ @author Paul Walker
+ @desc
+ This sets the docomm[3] array of the GF based
+ on the setting of the comm flag. Note the
+ comm flag constants are defined in
+ @seeheader pughDriver.h. As well as SetupPGF
+ this is called by thorns (eg, elliptic) which
+ need to toggle this state.
+ @enddesc
+ @calledby SetupPGF
+@@*/
+
+
+void SetGFComm(pGF *res, int docomm) {
+ /* Copy docomm */
+ int i;
+ res->commflag = docomm;
+
+ for (i=0;i<6;i++)
+ res->docomm[i] = 0;
+
+ if (docomm == PUGH_NOCOMM) {
+ for (i=0;i<6;i++)
+ res->docomm[i] = 0;
+ } else if (docomm == PUGH_ALLCOMM) {
+ for (i=0;i<6;i++)
+ res->docomm[i] = 1;
+ } else if (docomm == PUGH_XCOMM) {
+ res->docomm[0] = 1;
+ res->docomm[1] = 1;
+ } else if (docomm == PUGH_YCOMM) {
+ res->docomm[2] = 1;
+ res->docomm[3] = 1;
+ } else if (docomm == PUGH_ZCOMM) {
+ res->docomm[4] = 1;
+ res->docomm[5] = 1;
+ } else if (docomm == PUGH_PLUSFACESCOMM) {
+ res->docomm[1] = 1;
+ res->docomm[3] = 1;
+ res->docomm[5] = 1;
+ } else if (docomm == PUGH_MINUSFACESCOMM) {
+ res->docomm[0] = 1;
+ res->docomm[2] = 1;
+ res->docomm[4] = 1;
+ } else {
+ printf("WARNING: Unsupported comm. in grid function %s .\n",res->name);
+ printf(" Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n");
+ for (i=0;i<6;i++)
+ res->docomm[i] = 1;
+ }
+
+ /* Handle nx = 1 type cases. This is only important for one
+ processor MPI periodic boundaries.
+ */
+ /* This is a hack to get a GH. Unfortunately I decided not
+ to pass a GH to this routine, and I should go back and
+ change it, but that is a lot of changes so for now I'll
+ do the following.
+ */
+ if (GHList->GH->nx == 1) {
+ res->docomm[0] = 0;
+ res->docomm[1] = 0;
+ }
+
+ if (GHList->GH->ny == 1) {
+ res->docomm[2] = 0;
+ res->docomm[3] = 0;
+ }
+
+ if (GHList->GH->nz == 1) {
+ res->docomm[4] = 0;
+ res->docomm[5] = 0;
+ }
+}
diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c
new file mode 100644
index 0000000..58e6a07
--- /dev/null
+++ b/src/pugh/SetupSliceCenter.c
@@ -0,0 +1,137 @@
+ /*@@
+ @file SetupSliceCenter.c
+ @date
+ @author
+ @desc
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+void SetupSliceCenter(pGH *GH) {
+ int slice_center;
+
+
+ /* slice_center used to be part of the GF, */
+ /* either 1 (default) or 0 (octant,edge)*/
+ /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */
+
+ if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) {
+ printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)");
+ printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n",
+ GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]);
+ return;
+ }
+
+ slice_center=1;
+ if (Contains("grid","octant") || Contains("grid","quadrant") ||
+ Contains("grid","bitant"))
+ slice_center=0;
+ if (Contains("slice_loc","edge")) slice_center=0;
+
+ if (slice_center) {
+ /* For NONE octant mode: the center for slicings */
+ /* is moved to the center index */
+ /* (unless nx,ny,nz=1) */
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ }
+
+ if (GH->ny == 1) {
+ GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ } else {
+ GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ }
+
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ }
+ }
+ /* for octant mode, the slicing center is the index 1 or zero*/
+ else {
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0];
+ }
+
+ if (GH->ny == 1) {
+ GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ } else {
+ GH->spxyz[1] = 1 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1];
+ }
+
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = 1 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2];
+ }
+ }
+
+ /* Special case: in quadrant mode x and y are like full, but
+ z is like octant. Thinking about this gives you a headache,
+ but works.
+ */
+ if (Contains("grid","quadrant")) {
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ }
+ }
+
+ /* Another special case: bitant! Hey, with full and octant
+ already implemented, bitant is a breeze on the neurons */
+ if (Contains("grid","bitant"))
+ { if (GH->nx == 1)
+ { GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ }
+ else
+ { GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ }
+ if (GH->ny == 1)
+ { GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ }
+ else
+ { GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ }
+ }
+
+#ifdef THORN_CARTOON_2D
+ /* for cartoon_2d, treat x as in octant, y and z as in full, BB */
+ if (Contains("cartoon_active","yes")) {
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0];
+ }
+ }
+#endif
+}
diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c
new file mode 100644
index 0000000..2d21ae2
--- /dev/null
+++ b/src/pugh/SyncGroupGF.c
@@ -0,0 +1,271 @@
+/*@@
+ @file SyncGroupGH.c
+ @author Paul Walker
+ @date March 1997
+ @desc
+ Synchronizes all the Grid Functions (which have their communications
+ on) in a given group which has previously been registered to
+ the GF data base with StoreGFGroupData. The MPI section was
+ formally called SyncGH.c
+ @enddesc
+ @history
+ @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+ @hdesc Added the functionality to synchronise indices from a group
+ rather than the whole GH. Changed the routine/file names.
+ @endhistory
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+/*#define VERBOSE*/
+#undef COMM_TIMING
+
+int GetGFGroupData(const char *name, int *nGFs, int **GFIndices);
+
+/*@@
+ @routine SyncGroupGF
+ @author Paul Walker
+ @date March 1997
+ @desc
+ Synchronizes all the Grid Functions (which have their communications
+ on) in a given group which has previously been registered to
+ the GF data base with StoreGFGroupData. The MPI section was
+ formally called SyncGH.c
+ @enddesc
+ @history
+ @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+ @hdesc Added the functionality to synchronise indices from a group
+ rather than the whole GH. Changed the routine/file names.
+ @endhistory
+ @@*/
+
+
+/* This routine works in one of two ways:
+ *
+ * 1. If using derived data types, it posts receives, posts sends
+ * waits for all sends and receives, and finishes the receives.
+ *
+ * 2. If using buffered communication, it posts recieves, posts
+ * sends, waits for receives only, and finishes them. We don't
+ * need to wait for sends until the next time through. This
+ * is why the wait for the send buffer is immediately before
+ * the post send. (Since the send buffers are initially set
+ * to MPI_REQUEST_NULL the first Wait and any waits in
+ * unused directions are a wash).
+ *
+ * This routine is intertwined with the contents of routines
+ * pGF_PostRecv, pGF_PostSend, and pGF_FinishSend.
+ *
+ * Without MPI, this does nothing, of course.
+ *
+ * Note this routine is also responsible for updating the
+ * commt global so we can get a measure of how much time we
+ * spend in communications overhead.
+ *
+ * Also note that it is more efficient to sync a number of
+ * GF together (as a group or a hierachy) rather than
+ * singly. This is because syncing single functions waits for the
+ * receives on a single function before posting the sends on
+ * all the other functions.
+ */
+
+void SyncGroupGF(pGH *GH,const char *groupname) {
+#ifdef MPI
+ int j, Dir, index;
+ double t1,t2,t3;
+ MPI_Request *sr = NULL;
+ MPI_Status ms, mss;
+#endif
+ int i;
+ double st[PUGH_NTIMERS],et[PUGH_NTIMERS];
+ int l;
+ int *GFindices;
+ int nGFs;
+
+ pughTimers(st);
+
+ /* Find which grid functions make up the group */
+ GetGFGroupData(groupname,&nGFs,&GFindices);
+
+ /* Say which grid functions will actually be synchronised */
+ if (Contains("cactus_sync_verbose","yes")) {
+ printf("Syncing %d grid functions in group %s\n",nGFs,groupname);
+ printf(" Syncing GFs ...");
+ for (i=0;i<nGFs;i++) {
+ if (GH->gridFuncs[GFindices[i]]->storage
+ && (GH->gridFuncs[GFindices[i]]->docomm[1] ||
+ GH->gridFuncs[GFindices[i]]->docomm[2] ||
+ GH->gridFuncs[GFindices[i]]->docomm[3] ||
+ GH->forceSync))
+ printf(" %s",GH->gridFuncs[GFindices[i]]->name);
+ }
+ printf("\n");
+ }
+
+#ifdef MPI
+ if (GH->commmodel == PUGH_DERIVEDTYPES) {
+ /* 2 faces, send and receive is the 2 * 2 */
+ sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request));
+ for (i=0;i<nGFs * 2 * 2; i++)
+ sr[i] = MPI_REQUEST_NULL;
+ }
+
+ for (Dir = 0; Dir < 3; Dir ++) {
+#ifdef COMM_TIMING
+ t1 = MPI_Wtime();
+#endif
+ for(i=0;i<nGFs;i++) {
+ pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+
+#ifdef COMM_TIMING
+ t2 = MPI_Wtime();
+ printf ("PR : %lf\n",t2-t1);
+#endif
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ for(i=0;i<nGFs;i++) {
+ /* Wait for the last send. Since these will be null if the
+ are not used, this is always safe.
+ */
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ } else {
+ for(i=0;i<nGFs;i++) {
+ /* Wait for the last send. Since these will be null if the
+ are not used, this is always safe.
+ */
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ }
+
+#ifdef COMM_TIMING
+ t1 = MPI_Wtime();
+ printf ("PS : %lf\n",t1-t2);
+#endif
+
+ /* Now comes the big difference between derived types and
+ allocated buffers. With derived types, we now have to
+ wait on all our recieve AND SEND buffers so we can
+ keep on using the send buffers ( as communications are
+ in-place). With the allocated we have to wait on each
+ recieve, but not on the send, since we don't need the
+ send buffer until we pack a send again (above)
+ */
+
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ /* Do a wait any on the recieves */
+ for(i=0;i<nGFs;i++) {
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss);
+ pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss);
+ pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ }
+
+ if (GH->commmodel == PUGH_DERIVEDTYPES) {
+ /* Load up the thing for the waitall */
+ for (i=0;i<nGFs;i++) {
+ int id = i*4;
+ if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] &&
+ GH->gridFuncs[GFindices[i]]->storage) {
+ sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir];
+ sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir];
+ } else {
+ sr[id] = MPI_REQUEST_NULL;
+ sr[id+1] = MPI_REQUEST_NULL;
+ }
+
+ if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] &&
+ GH->gridFuncs[GFindices[i]]->storage) {
+ sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1];
+ sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1];
+ } else {
+ sr[id+2] = MPI_REQUEST_NULL;
+ sr[id+3] = MPI_REQUEST_NULL;
+ }
+ }
+ /* Now do a waitall */
+ MPI_Waitall(4*nGFs,sr,&ms);
+
+ }
+ if (sr) free(sr);
+#ifdef COMM_TIMING
+ t2 = MPI_Wtime();
+ printf ("FR : %lf\n",t2-t1);
+#endif
+ }
+#endif
+ pughTimers(et);
+ for (l=0;l<PUGH_NTIMERS;l++)
+ commt[l] += et[l]-st[l];
+}
+
+
+/*@@
+ @routine SyncGH
+ @author Gabrielle Allen
+ @date Sep 27 1998
+ @desc
+ Wrapper to support SyncGH command, which is now implemented through
+ a group.
+ @enddesc
+ @@*/
+
+void SyncGH(pGH *GH) {
+
+ SyncGroupGF(GH,"$GH");
+
+}
+
+
+/*@@
+ @routine SyncGF
+ @author Gabrielle Allen
+ @date Sep 27 1998
+ @desc
+ Wrapper to support SyncGF command, which is now implemented through
+ a group. Note that it is more efficient to sync all required GFs at
+ the same time, using the @seeroutine SyncGroupGF routine with a given group.
+ @enddesc
+ @@*/
+
+void SyncGF(pGH *GH,pGF *GF) {
+
+ SyncGroupGF(GH,GF->name);
+
+}
+
+
+/* The old code for SyncGF is listed here, since it illustrates simply
+ * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv
+ * */
+
+/* void SyncGF(pGH *GH, pGF *GF) { */
+
+/* #ifdef MPI */
+/* int Dir; */
+/* MPI_Status mss; */
+/* for (Dir=0;Dir<3;Dir++) { */
+/* pGF_PostRecv(GH, GF, 2*Dir); */
+/* pGF_PostRecv(GH, GF, 2*Dir+1); */
+
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */
+/* pGF_PostSend(GH, GF, 2*Dir); */
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */
+/* pGF_PostSend(GH, GF, 2*Dir+1); */
+
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */
+/* pGF_FinishRecv(GH, GF, 2*Dir); */
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */
+/* pGF_FinishRecv(GH, GF, 2*Dir+1); */
+/* } */
+/* #endif */
+/* } */
+
diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c
new file mode 100644
index 0000000..316dfc1
--- /dev/null
+++ b/src/pugh/pGF_FinishRecv.c
@@ -0,0 +1,81 @@
+ /*@@
+ @file pGF_FinishRecv.c
+ @date Thu Apr 3 11:37:28 1997
+ @author Paul Walker
+ @desc
+ The routine which finalize the MPI recieves for a grid
+ function. Critically linked with @seefile pGF_PostRecv.c
+ and @seefile pGF_PostSend.c
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_FinishRecv
+ @date Thu Apr 3 11:38:07 1997
+ @author Paul Walker
+ @desc
+ This routine finalizes the MPI communication through a face.
+ It is crucially linked with @seeroutine pGF_PostRecv and
+ @seeroutine pGF_PostSend.
+ <p>
+ <b>Important!</b>
+ This routine does <b>not</b> wait on the recieves.
+ Before it is called, you must do the MPI_Wait or
+ else you will not get the right answer. for an
+ example of this, see @seeroutine SyncGroupGF or
+ @seeroutine SyncGroupGF
+ @enddesc
+ @calledby SyncGroupGF
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+@@*/
+
+void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+ int R;
+ MPI_Status ms;
+ int istart, iend, jstart, jend, kstart, kend;
+ int ii,jj,kk,xx;
+ int tmp;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ /* Here wait one at a time, so the others can arrive while
+ we copy the data back in... */
+
+#ifdef DEBUG_PRINT
+ printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n",
+ GF->name,dir,GH->myproc,GF->rreq[dir]);
+#endif
+
+ /* Copy buffer onto GF Storage */
+ istart = GH->ghosts[GF->stagger][dir][0][0];
+ iend = GH->ghosts[GF->stagger][dir][0][1];
+ jstart = GH->ghosts[GF->stagger][dir][1][0];
+ jend = GH->ghosts[GF->stagger][dir][1][1];
+ kstart = GH->ghosts[GF->stagger][dir][2][0];
+ kend = GH->ghosts[GF->stagger][dir][2][1];
+
+ /* Great now copy. Note ordering is just link in PostSend */
+ xx=0;
+ for (kk=kstart; kk<kend; kk++)
+ for (jj=jstart; jj<jend; jj++)
+ for (ii=istart; ii<iend; ii++)
+ GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++];
+
+ }
+
+#endif
+}
diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c
new file mode 100644
index 0000000..81c4a30
--- /dev/null
+++ b/src/pugh/pGF_PostRecv.c
@@ -0,0 +1,69 @@
+ /*@@
+ @file pGF_PostRecv.c
+ @date Fri Feb 21 11:28:06 1997
+ @author
+ @desc
+ Routines which post all the IRecv commands for a GF. These
+ allow the asyncronous model proposed in, for example,
+ @seeroutine SyncGF to go!
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_PostRecv
+ @date Thu Apr 3 12:10:46 1997
+ @author Paul Walker
+ @desc
+ This routine posts a recieve (MPI_Irecv) of the buffer
+ we want to get the send from another processor, which
+ will be sent by @seeroutine pGF_PostSend and then
+ finalized by @seeroutoine pGF_FinishRecv.
+ <p>
+ Aside from a silly calculation to get a unique tag
+ based on neigbors and processors, this is a very
+ straightforward routine.
+ @enddesc
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+ @@*/
+
+void pGF_PostRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+ int R;
+ int rtag;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno);
+ /* mod the tag to force MPI compliance */
+ rtag = rtag % 32768;
+#ifdef DEBUG_PRINT
+ printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n",
+ GF->name,dir,GH->myproc,rtag,
+ GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS)
+ CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]),
+ GF->buffer_sz[dir],PUGH_MPI_TYPE,
+ GH->neighbors[GH->myproc][dir],rtag,
+ GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+ if (GH->commmodel == PUGH_DERIVEDTYPES)
+ CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data),
+ 1,GH->recv_dt[GF->stagger][dir],
+ GH->neighbors[GH->myproc][dir],rtag,
+ GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+ }
+#endif
+}
diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c
new file mode 100644
index 0000000..d8ec630
--- /dev/null
+++ b/src/pugh/pGF_PostSend.c
@@ -0,0 +1,116 @@
+ /*@@
+ @file pGF_PostSend.c
+ @date Thu Apr 3 11:58:14 1997
+ @author Paul Walker
+ @desc
+ The send-arm of the three-way pugh communcations branch
+ which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c
+ and @seefile pGF_FinishRecv.c. See the documentation for
+ @seeroutine pGF_PostSend for details on this routine.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_PostSend
+ @date Thu Apr 3 11:58:59 1997
+ @author Paul Walker
+ @desc
+ This routine posts an asyncronous MPI send of a buffer
+ for a face (MPI_Isend). It does this by first packing up
+ a send buffer (allocated in @seeroutine SetupPGF) then
+ sending it out on the pipe.
+ <p>
+ Since this is an asynchronous communications model we
+ assume that a recieve has been posted for the send.
+ thus the correct calling order for this is as in
+ @seeroutine SyncGF, eg, after a Recv.
+ <p>
+ Note this does <b>not</b> wait on the send buffer from previous
+ communications. It is the users responsibility to wait on
+ that buffer.
+ @enddesc
+ @calledby SyncGroupGF
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+@@*/
+
+void pGF_PostSend(pGH *GH, pGF *GF, int dir) {
+ int R, stag, dircomp;
+ int tmp;
+ int ii,jj,kk,xx;
+ int istart, iend;
+ int jstart, jend;
+ int kstart, kend;
+
+#ifdef MPI
+ Double ts, tw, tp;
+
+ MPI_Status ms;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ ts = MPI_Wtime();
+ dircomp = dir+1; /* Complementary direction */
+ if (dircomp %2 == 0) dircomp = dir-1;
+ /* Note this is the complement of the rtag set in PostRecv */
+ stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] +
+ GH->nprocs * GF->gfno);
+ /* mod the tag to force MPI compliance */
+ stag = stag % 32768;
+
+#ifdef DEBUG_PRINT
+ printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n",
+ GF->name,dir,GH->myproc,stag,
+ GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+
+ if (GH->commmodel == PUGH_DERIVEDTYPES)
+ CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data),
+ 1,GH->send_dt[GF->stagger][dir],
+ GH->neighbors[GH->myproc][dir],stag,
+ GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ /* Copy my information into the send buffer */
+ istart = GH->overlap[GF->stagger][dir][0][0];
+ iend = GH->overlap[GF->stagger][dir][0][1];
+ jstart = GH->overlap[GF->stagger][dir][1][0];
+ jend = GH->overlap[GF->stagger][dir][1][1];
+ kstart = GH->overlap[GF->stagger][dir][2][0];
+ kend = GH->overlap[GF->stagger][dir][2][1];
+
+ /* Great now copy */
+ xx = 0;
+ for (kk=kstart; kk<kend; kk++)
+ for (jj=jstart; jj<jend; jj++)
+ for (ii=istart; ii<iend; ii++)
+ GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)];
+
+ tp = MPI_Wtime();
+
+ /* post send */
+ CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]),
+ GF->buffer_sz[dir],PUGH_MPI_TYPE,
+ GH->neighbors[GH->myproc][dir],stag,
+ GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+ tw = MPI_Wtime();
+#ifdef COMM_TIMING
+ printf ("GF %s Dir %d Time %lf\n",
+ GF->name, dir, tw-ts);
+#endif
+ }
+ }
+#endif
+}
diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c
new file mode 100644
index 0000000..bc8e211
--- /dev/null
+++ b/src/pugh/pGF_Reduction.c
@@ -0,0 +1,198 @@
+ /*@@
+ @file pGF_Reduction.c
+ @date Thu Apr 3 11:54:53 1997
+ @author Paul Walker
+ @desc
+ Various MPI reduction operators for maxima, minima, norms
+ and the like on grid functions. These are all pretty
+ straighforward thanks to the miracle of MPI_Allreduce.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_MaxVal
+ @date Thu Apr 3 11:55:25 1997
+ @author Paul Walker
+ @desc
+ Returns the maximum value of a distributed grid function.
+ @enddesc
+@@*/
+
+
+Double pGF_MaxVal(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i;
+
+ if (!GF->storage) return GF->maxval;
+
+ res = GF->data[0];
+ for (i=1;i<GH->npoints;i++)
+ res = (GF->data[i] > res? GF->data[i] : res);
+
+
+ /* So now res is the local max */
+#ifdef MPI
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD));
+#endif
+
+ return res;
+}
+
+
+ /*@@
+ @routine pGF_MinVal
+ @date Thu Apr 3 11:55:48 1997
+ @author Paul Walker
+ @desc
+ Returns the minimum value of a distributed grid function.
+ @enddesc
+@@*/
+
+Double pGF_MinVal(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i;
+
+ if (!GF->storage) return GF->minval;
+
+ res = GF->data[0];
+ for (i=1;i<GH->npoints;i++)
+ res = (GF->data[i] < res ? GF->data[i] : res);
+
+ /* So now res is the local max */
+#ifdef MPI
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD));
+#endif
+
+ return res;
+}
+
+
+
+ /*@@
+ @routine pGF_Norm1
+ @date Thu Apr 3 11:56:05 1997
+ @author Paul Walker
+ @desc
+ Returns the average of a distributed grid function.
+ @enddesc
+@@*/
+
+Double pGF_Norm1(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i, ii, jj, kk;
+ int is,ie,js,je,ks,ke;
+ int np,tnp;
+
+ if (!GF->storage) return GF->norm1;
+
+
+#define ABS(x) ((x)<0?-(x):(x))
+
+#ifndef MPI
+ res = 0.0;
+ for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]);
+ tnp = GH->npoints;
+
+#else
+ res = 0.0;
+ np = 0;
+ for (kk=GH->ownership[GF->stagger][2][0];
+ kk < GH->ownership[GF->stagger][2][1];
+ kk ++)
+ for (jj = GH->ownership[GF->stagger][1][0];
+ jj < GH->ownership[GF->stagger][1][1];
+ jj ++)
+ for (ii = GH->ownership[GF->stagger][0][0];
+ ii < GH->ownership[GF->stagger][0][1];
+ ii ++) {
+ res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]);
+ np++;
+ }
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+ res = res/tnp;
+ return res;
+}
+
+
+ /*@@
+ @routine pGF_Norm2
+ @date Thu Apr 3 11:56:36 1997
+ @author Paul Walker
+ @desc
+ Returns the "norm2" of a distributed grid function. That
+ is, returns $\sqrt{\Sigma (a_i * a_i) / np}$.
+ @enddesc
+@@*/
+
+
+Double pGF_Norm2(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i, ii, jj, kk;
+ int is,ie,js,je,ks,ke;
+ int np, tnp;
+
+ if (!GF->storage) return GF->norm2;
+
+#ifndef MPI
+ res = 0.0;
+ for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i];
+ tnp = GH->npoints;
+
+#else
+ /* MPI version depends heavily on the ownership array */
+ res = 0.0;
+ np = 0;
+
+ for (kk=GH->ownership[GF->stagger][2][0];
+ kk < GH->ownership[GF->stagger][2][1];
+ kk ++)
+ for (jj = GH->ownership[GF->stagger][1][0];
+ jj < GH->ownership[GF->stagger][1][1];
+ jj ++)
+ for (ii = GH->ownership[GF->stagger][0][0];
+ ii < GH->ownership[GF->stagger][0][1];
+ ii ++) {
+ res += GF->data[DATINDEX(GH,ii,jj,kk)] *
+ GF->data[DATINDEX(GH,ii,jj,kk)];
+ np ++;
+ }
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+ res = sqrt(res/tnp);
+ return res;
+}
+
+ /*@@
+ @routine pGF_AllNorms
+ @date Mon Jun 30 10:44:57 1997
+ @author Paul Walker
+ @desc
+ Returns all 4 norms into a Double pointer in the order
+ max min norm1 norm2
+ <p>
+ This is purely a convenience function.
+ @enddesc
+ @calls pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2
+@@*/
+
+void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) {
+ res[0] = pGF_MaxVal(GH,GF);
+ res[1] = pGF_MinVal(GH,GF);
+ res[2] = pGF_Norm1(GH,GF);
+ res[3] = pGF_Norm2(GH,GF);
+}
+
diff --git a/src/pugh_Comm.h b/src/pugh_Comm.h
new file mode 100644
index 0000000..0108c7b
--- /dev/null
+++ b/src/pugh_Comm.h
@@ -0,0 +1,52 @@
+ /*@@
+ @header pugh_Comm.h
+ @date Thu Feb 4 11:42:50 1999
+ @author Tom Goodale
+ @desc
+
+ @enddesc
+ @version $Header$
+ @@*/
+
+#ifndef _PUGH_COMM_H_
+#define _PUGH_COMM_H_
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+ /* Overloaded functions. */
+int pugh_SyncGroup(cGH *GH, const char *group);
+int pugh_EnableGroupStorage(cGH *GH, const char *group);
+int pugh_EnableGroupStorage(cGH *GH, const char *group);
+
+int pugh_EnableGroupComm(cGH *GH, const char *group);
+int pugh_EnableGroupComm(cGH *GH, const char *group);
+
+int pugh_Barrier(cGH *GH);
+int pugh_Reduce(cGH *GH,
+ const char *operation,
+ int n_infields,
+ int n_outfields,
+ int out_type,
+ void **outarray,
+ ...);
+
+int pugh_Interp(cGH *GH,
+ const char *operation,
+ int n_coords,
+ int n_infields,
+ int n_outfields,
+ int n_points,
+ int type,
+ ...);
+
+int pugh_ParallelInit(cGH *GH);
+int pugh_Exit(cGH *GH);
+int pugh_Abort(cGH *GH);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/src/pugh_extension.h b/src/pugh_extension.h
new file mode 100644
index 0000000..4307ccd
--- /dev/null
+++ b/src/pugh_extension.h
@@ -0,0 +1,30 @@
+ /*@@
+ @header pugh_extension.h
+ @date Thu Feb 4 10:51:27 1999
+ @author Tom Goodale
+ @desc
+ The header file for PUGH GH extension stuff.
+ @enddesc
+ @version $Header$
+ @@*/
+
+#ifndef _PUGH_EXTENSION_H_
+#define _PUGH_EXTENSION_H_
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+void *pugh_SetupGH(tFleshConfig *config,
+ int convergence_level,
+ cGH *GH);
+
+int pugh_InitGH(cGH *GH);
+
+int pugh_rfrTraverseGH(cGH *GH, int rfrpoint);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif