aboutsummaryrefslogtreecommitdiff
path: root/src/pugh
diff options
context:
space:
mode:
Diffstat (limited to 'src/pugh')
-rw-r--r--src/pugh/SetupPGArray.c84
-rw-r--r--src/pugh/SetupPGF.c438
-rw-r--r--src/pugh/SetupSliceCenter.c137
-rw-r--r--src/pugh/SyncGroupGF.c271
-rw-r--r--src/pugh/pGF_FinishRecv.c81
-rw-r--r--src/pugh/pGF_PostRecv.c69
-rw-r--r--src/pugh/pGF_PostSend.c116
-rw-r--r--src/pugh/pGF_Reduction.c198
8 files changed, 1394 insertions, 0 deletions
diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c
new file mode 100644
index 0000000..9a5cf71
--- /dev/null
+++ b/src/pugh/SetupPGArray.c
@@ -0,0 +1,84 @@
+ /*@@
+ @file SetupArray.c
+ @date Fri Feb 21 11:04:20 1997
+ @author
+ @desc
+ Sets up (eg allocates) the pugh Array. This is trivial.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine SetupPGArray
+ @date Fri Feb 21 11:05:04 1997
+ @author
+ @desc
+ Note that npoints is a GLOBAL measure.
+ @enddesc
+ @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGArray *SetupPGArray(pGH *GH, char *name, int npoints) {
+ pGArray *res; /* The result */
+ pGArray **rlist; /* A temporary to add to the GH */
+ int lnpoints, i;
+
+ /* Space for the struct */
+ res = (pGArray *)malloc(sizeof(pGArray));
+
+ /* Set my parent GH */
+ res->parentGH = GH;
+
+ /* Set up the name */
+ res->name = (char *)malloc(strlen(name)*sizeof(char));
+ strcpy(res->name,name);
+
+ /* Allocate my memory */
+ if (npoints > 0) {
+ lnpoints = npoints/GH->nprocs;
+ if (GH->myproc == GH->nprocs - 1) {
+ lnpoints = npoints - lnpoints * (GH->nprocs-1);
+ }
+ res->npoints = lnpoints;
+ res->data = (Double *)malloc(lnpoints*sizeof(Double));
+ } else {
+ res->npoints = 1;
+ res->data = (Double *)malloc(sizeof(Double));
+ }
+
+ /* Setup IO stuff to default no IO with space for xgraph files.*/
+ res->do_arrayio = 0;
+
+ res->IEEEfile = NULL;
+
+ /* Finally add this to the list of grid funcs in the gh */
+ GH->nArrays ++;
+ rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *));
+ if (GH->nArrays > 1) {
+ for (i=0;i<GH->nArrays-1;i++)
+ rlist[i] = GH->Arrays[i];
+ free(GH->Arrays);
+ }
+ res->pgano = GH->nArrays - 1;
+ rlist[GH->nArrays-1] = res;
+ GH->Arrays = rlist;
+
+ /* And return myself... */
+ return res;
+}
+
+void DestroyPGArray(pGH *GH, pGArray **pGAin) {
+ pGArray *pGA;
+ pGA = *pGAin;
+
+ free(pGA->name);
+ free(pGA->data);
+
+ free(pGA);
+ *pGAin=NULL;
+}
diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c
new file mode 100644
index 0000000..acec587
--- /dev/null
+++ b/src/pugh/SetupPGF.c
@@ -0,0 +1,438 @@
+ /*@@
+ @file SetupPGF.c
+ @date Fri Feb 21 11:04:20 1997
+ @author
+ @desc
+ Sets up (eg allocates) the pugh Grid Function. This is
+ crucial to understaiding the grid function struct.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine SetupPGF
+ @date Fri Feb 21 11:05:04 1997
+ @author
+ @desc
+ Allocates the Grid Func structure (I wanted to say object ...)
+ There are several steps to this
+ <ul>
+ <li> Set up the name
+ <li> Set up the docomm flag and IO flags
+ <li> Allocate output file pointer arrays
+ <li> Allocate the data buffer
+ <li> Allocate the send buffer
+ <li> Add myself to the Grid Hierarchy
+ </ul>
+ But this is pretty straightforward code nonetheless.
+ <p>
+ Note that the input variable "storage" determines whether to
+ allocate the 3-D data or not. You can toggle this later
+ with @seeroutine DisableGFDataStorage and @seeroutine
+ EnableGFDataStorage
+ @enddesc
+ @calls DisableGFDataStorage, EnableGFDataStorage
+@@*/
+
+
+pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) {
+ pGF *res; /* The result */
+ pGF **rlist; /* A temporary to add to the GH */
+ int i, dir, dirp1, dirp2, sz; /* Counter thingies */
+
+ /* Space for the struct */
+ res = (pGF *)malloc(sizeof(pGF));
+
+ /* Set my parent GH */
+ res->parentGH = GH;
+
+ /* Set up the name */
+ res->name = (char *)malloc((1+strlen(name))*sizeof(char));
+ strcpy(res->name,name);
+
+ res->stagger = stagger;
+ if( (GH->stagger == PUGH_NO_STAGGER) &&
+ (res->stagger != PUGH_VERTEXCTR) )
+ { printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n");
+ printf (" designation PUGH_NO_STAGGER. \n");
+ STOP;
+ }
+
+
+ /* Setup IO stuff to default no IO with space for xgraph files.*/
+ res->do_3dio = 0;
+ res->do_2dio = 0;
+ res->do_1dio = 0;
+ res->do_0dio = 0;
+ res->do_conv = 0;
+
+ res->convfile = NULL;
+
+ res->xgfile = (FILE **)malloc(5*sizeof(FILE *));
+ for (i=0;i<5;i++)
+ res->xgfile[i] = NULL;
+
+ res->zerodfile = (FILE **)malloc(4*sizeof(FILE *));
+ for (i=0;i<4;i++)
+ res->zerodfile[i] = NULL;
+
+ res->IEEEfile = NULL;
+
+ for (i=0;i<3;i++)
+ res->IEEEfile_2d[i] = NULL;
+
+ res->show_maxval = 0;
+
+ res->lastio_it[0] = -1; res->lastio_it[1] = -1;
+ res->lastio_it[2] = -1; res->lastio_it[3] = -1;
+
+ SetGFComm(res, docomm);
+
+
+ /* Finally add this to the list of grid funcs in the gh */
+ GH->ngridFuncs ++;
+ rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *));
+ if (GH->ngridFuncs > 1) {
+ for (i=0;i<GH->ngridFuncs-1;i++)
+ rlist[i] = GH->gridFuncs[i];
+ free(GH->gridFuncs);
+ }
+ res->gfno = GH->ngridFuncs - 1;
+ rlist[GH->ngridFuncs-1] = res;
+ GH->gridFuncs = rlist;
+ /* Set up the comm layer */
+ res->padddata = NULL;
+ res->data = NULL;
+ res->send_buffer = NULL;
+ res->recv_buffer = NULL;
+
+ if (storage == PUGH_NOSTORAGE) {
+ res->storage = PUGH_STORAGE; /* Fake it out... */
+ DisableGFDataStorage(GH, res);
+ } else if (storage == PUGH_STORAGE) {
+ res->storage = PUGH_NOSTORAGE; /* Fake it out... */
+ EnableGFDataStorage(GH, res);
+ } else {
+ printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage);
+ printf (" Perhaps misspelled in GridFuncList? Defaulting to storage active.\n");
+ EnableGFDataStorage(GH, res);
+ }
+
+#ifdef MPI
+ /* Null my send and recieve requests */
+ for (i=0;i<6;i++) {
+ res->sreq[i] = MPI_REQUEST_NULL;
+ res->rreq[i] = MPI_REQUEST_NULL;
+ }
+#endif
+
+ /* And return myself... */
+ return res;
+}
+
+ /*@@
+ @routine DestroyPGF
+ @date Thu Aug 21 11:44:22 1997
+ @author Paul Walker
+ @desc
+ Destroys a GF object.
+ @enddesc
+@@*/
+
+void DestroyPGF(pGH *GH, pGF **GFin) {
+ pGF *GF;
+ GF = *GFin;
+
+ if (GF->storage)
+ DisableGFDataStorage(GH, GF);
+
+ free(GF->name);
+ free(GF->padddata);
+
+ free(GF->xgfile);
+ free(GF->zerodfile);
+
+ GF->data = NULL;
+
+ free(GF);
+ *GFin = NULL;
+
+}
+
+
+ /*@@
+ @routine EnableGFDataStorage
+ @date Thu Apr 3 12:44:38 1997
+ @author Paul Walker
+ @desc
+ This routine toggles the data storage to the "on"
+ position. That means that data points to a 3D
+ array, rather than a single Double, and the
+ storage flag is set to one. This is used quite
+ a lot by thorns which need to toggle memory. for
+ instance, see the trK Driver in the util thron.
+ @enddesc
+@@*/
+
+/* Statics for padding */
+static int padding_active = -1;
+static int padding_cacheline_bits, padding_size, padding_address_spacing;
+
+int zero_memory = -1;
+
+void EnableGFDataStorage(pGH *GH, pGF *GF) {
+ int i, dir, dirp1, dirp2, sz;
+ int special_pad, cache_size, start;
+
+ if (zero_memory < 0) {
+ zero_memory = Contains("zero_memory","yes");
+ if (zero_memory) {
+ printf ("Zeroing memory for allocated GFs at alloc time\n");
+ }
+ }
+
+
+ if (padding_active == -1) {
+ padding_active = Contains("padding_active","yes");
+ padding_cacheline_bits = Geti("padding_cacheline_bits");
+ padding_size = Geti("padding_size");
+ padding_address_spacing = Geti("padding_address_spacing");
+ if (padding_active) {
+ printf ("PUGH Memory padding active. Stats:\n");
+ printf (" cacheline_bits : %d\n",
+ padding_cacheline_bits);
+ printf (" size : %d\n",
+ padding_size);
+ printf (" spacing : %d\n",
+ padding_address_spacing);
+ }
+ }
+
+ if (GF->storage) {
+ printf ("WARNING: Tried to enable %s when already enabled\n",
+ GF->name);
+ return;
+ }
+
+ /* Set up the send buffers. Note we only do this if
+ we are not using the derived types comm style.
+ */
+ assert(!GF->send_buffer);
+ GF->send_buffer = (Double **)malloc(6*sizeof(Double *));
+
+ assert(!GF->recv_buffer);
+ GF->recv_buffer = (Double **)malloc(6*sizeof(Double *));
+
+ for (i=0;i<6;i++) {
+ dir = i/2;
+ dirp1 = (i/2+1)%3;
+ dirp2 = (i/2+2)%3;
+
+ if (dir == 0) {
+ sz = GH->stencil_width * GH->lny * GH->lnz;
+ } else if (dir == 1) {
+ sz = GH->stencil_width * GH->lnx * GH->lnz;
+ } else {
+ sz = GH->stencil_width * GH->lnx * GH->lny;
+ }
+
+ if (GH->neighbors[GH->myproc][i] >= 0) {
+ GF->buffer_sz[i] = sz;
+ GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+ GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double));
+ assert(GF->recv_buffer[i]);
+ assert(GF->send_buffer[i]);
+ } else {
+ GF->buffer_sz[i] = 0;
+ GF->send_buffer[i] = NULL;
+ GF->recv_buffer[i] = NULL;
+ }
+ }
+
+ /* Set up the storage */
+ if (GF->padddata) free (GF->padddata);
+ if (!padding_active) {
+ GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double));
+ GF->data = GF->padddata; /* No padding case */
+ if (zero_memory) {
+ for (i=0;i<GH->npoints;i++)
+ GF->padddata[i] = 0.0;
+ }
+ } else {
+ GF->padddata = (Double *)malloc((GH->npoints + padding_size)
+ *sizeof(Double));
+
+ /* Align the actual starting address on a cache line.
+ More specifically align on a unique cache line for
+ each field.
+ */
+ cache_size = 2 << (padding_cacheline_bits - 1);
+ start = ((long) (GF->padddata) / sizeof(Double))%cache_size;
+
+ special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size;
+ GF->data = (GF->padddata) + special_pad;
+
+ if (special_pad > padding_size){
+ printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size);
+ STOP;
+ }
+ if (zero_memory) {
+ for (i=0;i<GH->npoints+padding_size;i++)
+ GF->padddata[i] = 0.0;
+ }
+ }
+
+ if (!GF->padddata) {
+ printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n",
+ GF->name, GF->gfno);
+ STOP;
+ }
+ GF->storage = 1;
+
+
+}
+
+ /*@@
+ @routine DisableGFDataStorage
+ @date Thu Apr 3 12:45:34 1997
+ @author Paul Walker
+ @desc
+ This routine disables the grid function storage.
+ That is, it un-allocates the 3D array and in its
+ place allocates a single Double with the value
+ 0.0. This allows us to still ahve something to
+ pass around (an array of size (1,1,1) in fortran
+ speak) but also to not need all our 3D arrays
+ "on" all the time.
+ @enddesc
+@@*/
+
+
+void DisableGFDataStorage(pGH *GH, pGF *GF) {
+ int i;
+
+ if (!GF->storage) {
+ printf ("Warning: Tried to disable %s when already disabled\n",
+ GF->name);
+ return;
+ }
+
+ if (GF->padddata) {
+ free(GF->padddata);
+ GF->padddata = NULL;
+ GF->data = NULL;
+ }
+
+ if (GF->send_buffer) {
+ for (i=0;i<6;i++)
+ if (GF->send_buffer[i]) {
+#ifdef MPI
+ if(GF->sreq[i] != MPI_REQUEST_NULL){
+ CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i])));
+ }
+#endif
+ free(GF->send_buffer[i]);
+ GF->send_buffer[i] = NULL;
+ }
+ free(GF->send_buffer);
+ GF->send_buffer = NULL;
+ }
+
+
+ if (GF->recv_buffer) {
+ for (i=0;i<6;i++)
+ if (GF->recv_buffer[i]) {
+ free(GF->recv_buffer[i]);
+ GF->recv_buffer[i] = NULL;
+ }
+ free(GF->recv_buffer);
+ GF->recv_buffer = NULL;
+ }
+
+ GF->padddata = (Double *)malloc(sizeof(Double));
+ GF->data = GF->padddata;
+ GF->data[0] = 0.0; /* Very important! */
+ GF->storage = 0;
+}
+
+ /*@@
+ @routine SetGFComm
+ @date Thu Apr 3 12:46:23 1997
+ @author Paul Walker
+ @desc
+ This sets the docomm[3] array of the GF based
+ on the setting of the comm flag. Note the
+ comm flag constants are defined in
+ @seeheader pughDriver.h. As well as SetupPGF
+ this is called by thorns (eg, elliptic) which
+ need to toggle this state.
+ @enddesc
+ @calledby SetupPGF
+@@*/
+
+
+void SetGFComm(pGF *res, int docomm) {
+ /* Copy docomm */
+ int i;
+ res->commflag = docomm;
+
+ for (i=0;i<6;i++)
+ res->docomm[i] = 0;
+
+ if (docomm == PUGH_NOCOMM) {
+ for (i=0;i<6;i++)
+ res->docomm[i] = 0;
+ } else if (docomm == PUGH_ALLCOMM) {
+ for (i=0;i<6;i++)
+ res->docomm[i] = 1;
+ } else if (docomm == PUGH_XCOMM) {
+ res->docomm[0] = 1;
+ res->docomm[1] = 1;
+ } else if (docomm == PUGH_YCOMM) {
+ res->docomm[2] = 1;
+ res->docomm[3] = 1;
+ } else if (docomm == PUGH_ZCOMM) {
+ res->docomm[4] = 1;
+ res->docomm[5] = 1;
+ } else if (docomm == PUGH_PLUSFACESCOMM) {
+ res->docomm[1] = 1;
+ res->docomm[3] = 1;
+ res->docomm[5] = 1;
+ } else if (docomm == PUGH_MINUSFACESCOMM) {
+ res->docomm[0] = 1;
+ res->docomm[2] = 1;
+ res->docomm[4] = 1;
+ } else {
+ printf("WARNING: Unsupported comm. in grid function %s .\n",res->name);
+ printf(" Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n");
+ for (i=0;i<6;i++)
+ res->docomm[i] = 1;
+ }
+
+ /* Handle nx = 1 type cases. This is only important for one
+ processor MPI periodic boundaries.
+ */
+ /* This is a hack to get a GH. Unfortunately I decided not
+ to pass a GH to this routine, and I should go back and
+ change it, but that is a lot of changes so for now I'll
+ do the following.
+ */
+ if (GHList->GH->nx == 1) {
+ res->docomm[0] = 0;
+ res->docomm[1] = 0;
+ }
+
+ if (GHList->GH->ny == 1) {
+ res->docomm[2] = 0;
+ res->docomm[3] = 0;
+ }
+
+ if (GHList->GH->nz == 1) {
+ res->docomm[4] = 0;
+ res->docomm[5] = 0;
+ }
+}
diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c
new file mode 100644
index 0000000..58e6a07
--- /dev/null
+++ b/src/pugh/SetupSliceCenter.c
@@ -0,0 +1,137 @@
+ /*@@
+ @file SetupSliceCenter.c
+ @date
+ @author
+ @desc
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+void SetupSliceCenter(pGH *GH) {
+ int slice_center;
+
+
+ /* slice_center used to be part of the GF, */
+ /* either 1 (default) or 0 (octant,edge)*/
+ /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */
+
+ if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) {
+ printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)");
+ printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n",
+ GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]);
+ return;
+ }
+
+ slice_center=1;
+ if (Contains("grid","octant") || Contains("grid","quadrant") ||
+ Contains("grid","bitant"))
+ slice_center=0;
+ if (Contains("slice_loc","edge")) slice_center=0;
+
+ if (slice_center) {
+ /* For NONE octant mode: the center for slicings */
+ /* is moved to the center index */
+ /* (unless nx,ny,nz=1) */
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ }
+
+ if (GH->ny == 1) {
+ GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ } else {
+ GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ }
+
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ }
+ }
+ /* for octant mode, the slicing center is the index 1 or zero*/
+ else {
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0];
+ }
+
+ if (GH->ny == 1) {
+ GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ } else {
+ GH->spxyz[1] = 1 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1];
+ }
+
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = 1 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2];
+ }
+ }
+
+ /* Special case: in quadrant mode x and y are like full, but
+ z is like octant. Thinking about this gives you a headache,
+ but works.
+ */
+ if (Contains("grid","quadrant")) {
+ if (GH->nz == 1) {
+ GH->spxyz[2] = 0;
+ GH->sp2dxyz[2] = 0;
+ } else {
+ GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2];
+ }
+ }
+
+ /* Another special case: bitant! Hey, with full and octant
+ already implemented, bitant is a breeze on the neurons */
+ if (Contains("grid","bitant"))
+ { if (GH->nx == 1)
+ { GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ }
+ else
+ { GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0];
+ }
+ if (GH->ny == 1)
+ { GH->spxyz[1] = 0;
+ GH->sp2dxyz[1] = 0;
+ }
+ else
+ { GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1];
+ }
+ }
+
+#ifdef THORN_CARTOON_2D
+ /* for cartoon_2d, treat x as in octant, y and z as in full, BB */
+ if (Contains("cartoon_active","yes")) {
+ if (GH->nx == 1) {
+ GH->spxyz[0] = 0;
+ GH->sp2dxyz[0] = 0;
+ } else {
+ GH->spxyz[0] = 1 - GH->lb[GH->myproc][0];
+ GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0];
+ }
+ }
+#endif
+}
diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c
new file mode 100644
index 0000000..2d21ae2
--- /dev/null
+++ b/src/pugh/SyncGroupGF.c
@@ -0,0 +1,271 @@
+/*@@
+ @file SyncGroupGH.c
+ @author Paul Walker
+ @date March 1997
+ @desc
+ Synchronizes all the Grid Functions (which have their communications
+ on) in a given group which has previously been registered to
+ the GF data base with StoreGFGroupData. The MPI section was
+ formally called SyncGH.c
+ @enddesc
+ @history
+ @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+ @hdesc Added the functionality to synchronise indices from a group
+ rather than the whole GH. Changed the routine/file names.
+ @endhistory
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+/*#define VERBOSE*/
+#undef COMM_TIMING
+
+int GetGFGroupData(const char *name, int *nGFs, int **GFIndices);
+
+/*@@
+ @routine SyncGroupGF
+ @author Paul Walker
+ @date March 1997
+ @desc
+ Synchronizes all the Grid Functions (which have their communications
+ on) in a given group which has previously been registered to
+ the GF data base with StoreGFGroupData. The MPI section was
+ formally called SyncGH.c
+ @enddesc
+ @history
+ @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale
+ @hdesc Added the functionality to synchronise indices from a group
+ rather than the whole GH. Changed the routine/file names.
+ @endhistory
+ @@*/
+
+
+/* This routine works in one of two ways:
+ *
+ * 1. If using derived data types, it posts receives, posts sends
+ * waits for all sends and receives, and finishes the receives.
+ *
+ * 2. If using buffered communication, it posts recieves, posts
+ * sends, waits for receives only, and finishes them. We don't
+ * need to wait for sends until the next time through. This
+ * is why the wait for the send buffer is immediately before
+ * the post send. (Since the send buffers are initially set
+ * to MPI_REQUEST_NULL the first Wait and any waits in
+ * unused directions are a wash).
+ *
+ * This routine is intertwined with the contents of routines
+ * pGF_PostRecv, pGF_PostSend, and pGF_FinishSend.
+ *
+ * Without MPI, this does nothing, of course.
+ *
+ * Note this routine is also responsible for updating the
+ * commt global so we can get a measure of how much time we
+ * spend in communications overhead.
+ *
+ * Also note that it is more efficient to sync a number of
+ * GF together (as a group or a hierachy) rather than
+ * singly. This is because syncing single functions waits for the
+ * receives on a single function before posting the sends on
+ * all the other functions.
+ */
+
+void SyncGroupGF(pGH *GH,const char *groupname) {
+#ifdef MPI
+ int j, Dir, index;
+ double t1,t2,t3;
+ MPI_Request *sr = NULL;
+ MPI_Status ms, mss;
+#endif
+ int i;
+ double st[PUGH_NTIMERS],et[PUGH_NTIMERS];
+ int l;
+ int *GFindices;
+ int nGFs;
+
+ pughTimers(st);
+
+ /* Find which grid functions make up the group */
+ GetGFGroupData(groupname,&nGFs,&GFindices);
+
+ /* Say which grid functions will actually be synchronised */
+ if (Contains("cactus_sync_verbose","yes")) {
+ printf("Syncing %d grid functions in group %s\n",nGFs,groupname);
+ printf(" Syncing GFs ...");
+ for (i=0;i<nGFs;i++) {
+ if (GH->gridFuncs[GFindices[i]]->storage
+ && (GH->gridFuncs[GFindices[i]]->docomm[1] ||
+ GH->gridFuncs[GFindices[i]]->docomm[2] ||
+ GH->gridFuncs[GFindices[i]]->docomm[3] ||
+ GH->forceSync))
+ printf(" %s",GH->gridFuncs[GFindices[i]]->name);
+ }
+ printf("\n");
+ }
+
+#ifdef MPI
+ if (GH->commmodel == PUGH_DERIVEDTYPES) {
+ /* 2 faces, send and receive is the 2 * 2 */
+ sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request));
+ for (i=0;i<nGFs * 2 * 2; i++)
+ sr[i] = MPI_REQUEST_NULL;
+ }
+
+ for (Dir = 0; Dir < 3; Dir ++) {
+#ifdef COMM_TIMING
+ t1 = MPI_Wtime();
+#endif
+ for(i=0;i<nGFs;i++) {
+ pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+
+#ifdef COMM_TIMING
+ t2 = MPI_Wtime();
+ printf ("PR : %lf\n",t2-t1);
+#endif
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ for(i=0;i<nGFs;i++) {
+ /* Wait for the last send. Since these will be null if the
+ are not used, this is always safe.
+ */
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ } else {
+ for(i=0;i<nGFs;i++) {
+ /* Wait for the last send. Since these will be null if the
+ are not used, this is always safe.
+ */
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ }
+
+#ifdef COMM_TIMING
+ t1 = MPI_Wtime();
+ printf ("PS : %lf\n",t1-t2);
+#endif
+
+ /* Now comes the big difference between derived types and
+ allocated buffers. With derived types, we now have to
+ wait on all our recieve AND SEND buffers so we can
+ keep on using the send buffers ( as communications are
+ in-place). With the allocated we have to wait on each
+ recieve, but not on the send, since we don't need the
+ send buffer until we pack a send again (above)
+ */
+
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ /* Do a wait any on the recieves */
+ for(i=0;i<nGFs;i++) {
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss);
+ pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir);
+ MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss);
+ pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1);
+ }
+ }
+
+ if (GH->commmodel == PUGH_DERIVEDTYPES) {
+ /* Load up the thing for the waitall */
+ for (i=0;i<nGFs;i++) {
+ int id = i*4;
+ if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] &&
+ GH->gridFuncs[GFindices[i]]->storage) {
+ sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir];
+ sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir];
+ } else {
+ sr[id] = MPI_REQUEST_NULL;
+ sr[id+1] = MPI_REQUEST_NULL;
+ }
+
+ if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] &&
+ GH->gridFuncs[GFindices[i]]->storage) {
+ sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1];
+ sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1];
+ } else {
+ sr[id+2] = MPI_REQUEST_NULL;
+ sr[id+3] = MPI_REQUEST_NULL;
+ }
+ }
+ /* Now do a waitall */
+ MPI_Waitall(4*nGFs,sr,&ms);
+
+ }
+ if (sr) free(sr);
+#ifdef COMM_TIMING
+ t2 = MPI_Wtime();
+ printf ("FR : %lf\n",t2-t1);
+#endif
+ }
+#endif
+ pughTimers(et);
+ for (l=0;l<PUGH_NTIMERS;l++)
+ commt[l] += et[l]-st[l];
+}
+
+
+/*@@
+ @routine SyncGH
+ @author Gabrielle Allen
+ @date Sep 27 1998
+ @desc
+ Wrapper to support SyncGH command, which is now implemented through
+ a group.
+ @enddesc
+ @@*/
+
+void SyncGH(pGH *GH) {
+
+ SyncGroupGF(GH,"$GH");
+
+}
+
+
+/*@@
+ @routine SyncGF
+ @author Gabrielle Allen
+ @date Sep 27 1998
+ @desc
+ Wrapper to support SyncGF command, which is now implemented through
+ a group. Note that it is more efficient to sync all required GFs at
+ the same time, using the @seeroutine SyncGroupGF routine with a given group.
+ @enddesc
+ @@*/
+
+void SyncGF(pGH *GH,pGF *GF) {
+
+ SyncGroupGF(GH,GF->name);
+
+}
+
+
+/* The old code for SyncGF is listed here, since it illustrates simply
+ * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv
+ * */
+
+/* void SyncGF(pGH *GH, pGF *GF) { */
+
+/* #ifdef MPI */
+/* int Dir; */
+/* MPI_Status mss; */
+/* for (Dir=0;Dir<3;Dir++) { */
+/* pGF_PostRecv(GH, GF, 2*Dir); */
+/* pGF_PostRecv(GH, GF, 2*Dir+1); */
+
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */
+/* pGF_PostSend(GH, GF, 2*Dir); */
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */
+/* pGF_PostSend(GH, GF, 2*Dir+1); */
+
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */
+/* pGF_FinishRecv(GH, GF, 2*Dir); */
+/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */
+/* pGF_FinishRecv(GH, GF, 2*Dir+1); */
+/* } */
+/* #endif */
+/* } */
+
diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c
new file mode 100644
index 0000000..316dfc1
--- /dev/null
+++ b/src/pugh/pGF_FinishRecv.c
@@ -0,0 +1,81 @@
+ /*@@
+ @file pGF_FinishRecv.c
+ @date Thu Apr 3 11:37:28 1997
+ @author Paul Walker
+ @desc
+ The routine which finalize the MPI recieves for a grid
+ function. Critically linked with @seefile pGF_PostRecv.c
+ and @seefile pGF_PostSend.c
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_FinishRecv
+ @date Thu Apr 3 11:38:07 1997
+ @author Paul Walker
+ @desc
+ This routine finalizes the MPI communication through a face.
+ It is crucially linked with @seeroutine pGF_PostRecv and
+ @seeroutine pGF_PostSend.
+ <p>
+ <b>Important!</b>
+ This routine does <b>not</b> wait on the recieves.
+ Before it is called, you must do the MPI_Wait or
+ else you will not get the right answer. for an
+ example of this, see @seeroutine SyncGroupGF or
+ @seeroutine SyncGroupGF
+ @enddesc
+ @calledby SyncGroupGF
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+@@*/
+
+void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+ int R;
+ MPI_Status ms;
+ int istart, iend, jstart, jend, kstart, kend;
+ int ii,jj,kk,xx;
+ int tmp;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ /* Here wait one at a time, so the others can arrive while
+ we copy the data back in... */
+
+#ifdef DEBUG_PRINT
+ printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n",
+ GF->name,dir,GH->myproc,GF->rreq[dir]);
+#endif
+
+ /* Copy buffer onto GF Storage */
+ istart = GH->ghosts[GF->stagger][dir][0][0];
+ iend = GH->ghosts[GF->stagger][dir][0][1];
+ jstart = GH->ghosts[GF->stagger][dir][1][0];
+ jend = GH->ghosts[GF->stagger][dir][1][1];
+ kstart = GH->ghosts[GF->stagger][dir][2][0];
+ kend = GH->ghosts[GF->stagger][dir][2][1];
+
+ /* Great now copy. Note ordering is just link in PostSend */
+ xx=0;
+ for (kk=kstart; kk<kend; kk++)
+ for (jj=jstart; jj<jend; jj++)
+ for (ii=istart; ii<iend; ii++)
+ GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++];
+
+ }
+
+#endif
+}
diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c
new file mode 100644
index 0000000..81c4a30
--- /dev/null
+++ b/src/pugh/pGF_PostRecv.c
@@ -0,0 +1,69 @@
+ /*@@
+ @file pGF_PostRecv.c
+ @date Fri Feb 21 11:28:06 1997
+ @author
+ @desc
+ Routines which post all the IRecv commands for a GF. These
+ allow the asyncronous model proposed in, for example,
+ @seeroutine SyncGF to go!
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_PostRecv
+ @date Thu Apr 3 12:10:46 1997
+ @author Paul Walker
+ @desc
+ This routine posts a recieve (MPI_Irecv) of the buffer
+ we want to get the send from another processor, which
+ will be sent by @seeroutine pGF_PostSend and then
+ finalized by @seeroutoine pGF_FinishRecv.
+ <p>
+ Aside from a silly calculation to get a unique tag
+ based on neigbors and processors, this is a very
+ straightforward routine.
+ @enddesc
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+ @@*/
+
+void pGF_PostRecv(pGH *GH, pGF *GF, int dir) {
+#ifdef MPI
+ int R;
+ int rtag;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno);
+ /* mod the tag to force MPI compliance */
+ rtag = rtag % 32768;
+#ifdef DEBUG_PRINT
+ printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n",
+ GF->name,dir,GH->myproc,rtag,
+ GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS)
+ CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]),
+ GF->buffer_sz[dir],PUGH_MPI_TYPE,
+ GH->neighbors[GH->myproc][dir],rtag,
+ GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+ if (GH->commmodel == PUGH_DERIVEDTYPES)
+ CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data),
+ 1,GH->recv_dt[GF->stagger][dir],
+ GH->neighbors[GH->myproc][dir],rtag,
+ GH->PUGH_COMM_WORLD,&(GF->rreq[dir])));
+ }
+#endif
+}
diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c
new file mode 100644
index 0000000..d8ec630
--- /dev/null
+++ b/src/pugh/pGF_PostSend.c
@@ -0,0 +1,116 @@
+ /*@@
+ @file pGF_PostSend.c
+ @date Thu Apr 3 11:58:14 1997
+ @author Paul Walker
+ @desc
+ The send-arm of the three-way pugh communcations branch
+ which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c
+ and @seefile pGF_FinishRecv.c. See the documentation for
+ @seeroutine pGF_PostSend for details on this routine.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_PostSend
+ @date Thu Apr 3 11:58:59 1997
+ @author Paul Walker
+ @desc
+ This routine posts an asyncronous MPI send of a buffer
+ for a face (MPI_Isend). It does this by first packing up
+ a send buffer (allocated in @seeroutine SetupPGF) then
+ sending it out on the pipe.
+ <p>
+ Since this is an asynchronous communications model we
+ assume that a recieve has been posted for the send.
+ thus the correct calling order for this is as in
+ @seeroutine SyncGF, eg, after a Recv.
+ <p>
+ Note this does <b>not</b> wait on the send buffer from previous
+ communications. It is the users responsibility to wait on
+ that buffer.
+ @enddesc
+ @calledby SyncGroupGF
+ @history
+ @hdate Nov 4 1998 @hauthor Gabrielle Allen
+ @hdesc Allow for forced synchronization of all GFs with storage
+ @endhistory
+@@*/
+
+void pGF_PostSend(pGH *GH, pGF *GF, int dir) {
+ int R, stag, dircomp;
+ int tmp;
+ int ii,jj,kk,xx;
+ int istart, iend;
+ int jstart, jend;
+ int kstart, kend;
+
+#ifdef MPI
+ Double ts, tw, tp;
+
+ MPI_Status ms;
+
+ /* Return if GF has no storage */
+ if (!(GF->storage)) return;
+
+ /* Return if communication not required and no forced synchronisation */
+ if (!(GH->forceSync || GF->docomm[dir])) return;
+
+ if (GH->neighbors[GH->myproc][dir] >= 0) {
+ ts = MPI_Wtime();
+ dircomp = dir+1; /* Complementary direction */
+ if (dircomp %2 == 0) dircomp = dir-1;
+ /* Note this is the complement of the rtag set in PostRecv */
+ stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] +
+ GH->nprocs * GF->gfno);
+ /* mod the tag to force MPI compliance */
+ stag = stag % 32768;
+
+#ifdef DEBUG_PRINT
+ printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n",
+ GF->name,dir,GH->myproc,stag,
+ GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]);
+#endif
+
+ if (GH->commmodel == PUGH_DERIVEDTYPES)
+ CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data),
+ 1,GH->send_dt[GF->stagger][dir],
+ GH->neighbors[GH->myproc][dir],stag,
+ GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+
+ if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) {
+ /* Copy my information into the send buffer */
+ istart = GH->overlap[GF->stagger][dir][0][0];
+ iend = GH->overlap[GF->stagger][dir][0][1];
+ jstart = GH->overlap[GF->stagger][dir][1][0];
+ jend = GH->overlap[GF->stagger][dir][1][1];
+ kstart = GH->overlap[GF->stagger][dir][2][0];
+ kend = GH->overlap[GF->stagger][dir][2][1];
+
+ /* Great now copy */
+ xx = 0;
+ for (kk=kstart; kk<kend; kk++)
+ for (jj=jstart; jj<jend; jj++)
+ for (ii=istart; ii<iend; ii++)
+ GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)];
+
+ tp = MPI_Wtime();
+
+ /* post send */
+ CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]),
+ GF->buffer_sz[dir],PUGH_MPI_TYPE,
+ GH->neighbors[GH->myproc][dir],stag,
+ GH->PUGH_COMM_WORLD,&(GF->sreq[dir])));
+ tw = MPI_Wtime();
+#ifdef COMM_TIMING
+ printf ("GF %s Dir %d Time %lf\n",
+ GF->name, dir, tw-ts);
+#endif
+ }
+ }
+#endif
+}
diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c
new file mode 100644
index 0000000..bc8e211
--- /dev/null
+++ b/src/pugh/pGF_Reduction.c
@@ -0,0 +1,198 @@
+ /*@@
+ @file pGF_Reduction.c
+ @date Thu Apr 3 11:54:53 1997
+ @author Paul Walker
+ @desc
+ Various MPI reduction operators for maxima, minima, norms
+ and the like on grid functions. These are all pretty
+ straighforward thanks to the miracle of MPI_Allreduce.
+ @enddesc
+ @version $Id$
+ @@*/
+
+#include "pugh.h"
+
+static char *rcsid = "$Id$";
+
+ /*@@
+ @routine pGF_MaxVal
+ @date Thu Apr 3 11:55:25 1997
+ @author Paul Walker
+ @desc
+ Returns the maximum value of a distributed grid function.
+ @enddesc
+@@*/
+
+
+Double pGF_MaxVal(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i;
+
+ if (!GF->storage) return GF->maxval;
+
+ res = GF->data[0];
+ for (i=1;i<GH->npoints;i++)
+ res = (GF->data[i] > res? GF->data[i] : res);
+
+
+ /* So now res is the local max */
+#ifdef MPI
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD));
+#endif
+
+ return res;
+}
+
+
+ /*@@
+ @routine pGF_MinVal
+ @date Thu Apr 3 11:55:48 1997
+ @author Paul Walker
+ @desc
+ Returns the minimum value of a distributed grid function.
+ @enddesc
+@@*/
+
+Double pGF_MinVal(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i;
+
+ if (!GF->storage) return GF->minval;
+
+ res = GF->data[0];
+ for (i=1;i<GH->npoints;i++)
+ res = (GF->data[i] < res ? GF->data[i] : res);
+
+ /* So now res is the local max */
+#ifdef MPI
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD));
+#endif
+
+ return res;
+}
+
+
+
+ /*@@
+ @routine pGF_Norm1
+ @date Thu Apr 3 11:56:05 1997
+ @author Paul Walker
+ @desc
+ Returns the average of a distributed grid function.
+ @enddesc
+@@*/
+
+Double pGF_Norm1(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i, ii, jj, kk;
+ int is,ie,js,je,ks,ke;
+ int np,tnp;
+
+ if (!GF->storage) return GF->norm1;
+
+
+#define ABS(x) ((x)<0?-(x):(x))
+
+#ifndef MPI
+ res = 0.0;
+ for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]);
+ tnp = GH->npoints;
+
+#else
+ res = 0.0;
+ np = 0;
+ for (kk=GH->ownership[GF->stagger][2][0];
+ kk < GH->ownership[GF->stagger][2][1];
+ kk ++)
+ for (jj = GH->ownership[GF->stagger][1][0];
+ jj < GH->ownership[GF->stagger][1][1];
+ jj ++)
+ for (ii = GH->ownership[GF->stagger][0][0];
+ ii < GH->ownership[GF->stagger][0][1];
+ ii ++) {
+ res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]);
+ np++;
+ }
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+ res = res/tnp;
+ return res;
+}
+
+
+ /*@@
+ @routine pGF_Norm2
+ @date Thu Apr 3 11:56:36 1997
+ @author Paul Walker
+ @desc
+ Returns the "norm2" of a distributed grid function. That
+ is, returns $\sqrt{\Sigma (a_i * a_i) / np}$.
+ @enddesc
+@@*/
+
+
+Double pGF_Norm2(pGH *GH, pGF *GF) {
+ Double res, lres;
+ int i, ii, jj, kk;
+ int is,ie,js,je,ks,ke;
+ int np, tnp;
+
+ if (!GF->storage) return GF->norm2;
+
+#ifndef MPI
+ res = 0.0;
+ for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i];
+ tnp = GH->npoints;
+
+#else
+ /* MPI version depends heavily on the ownership array */
+ res = 0.0;
+ np = 0;
+
+ for (kk=GH->ownership[GF->stagger][2][0];
+ kk < GH->ownership[GF->stagger][2][1];
+ kk ++)
+ for (jj = GH->ownership[GF->stagger][1][0];
+ jj < GH->ownership[GF->stagger][1][1];
+ jj ++)
+ for (ii = GH->ownership[GF->stagger][0][0];
+ ii < GH->ownership[GF->stagger][0][1];
+ ii ++) {
+ res += GF->data[DATINDEX(GH,ii,jj,kk)] *
+ GF->data[DATINDEX(GH,ii,jj,kk)];
+ np ++;
+ }
+ lres = res;
+ CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD));
+#endif
+
+ res = sqrt(res/tnp);
+ return res;
+}
+
+ /*@@
+ @routine pGF_AllNorms
+ @date Mon Jun 30 10:44:57 1997
+ @author Paul Walker
+ @desc
+ Returns all 4 norms into a Double pointer in the order
+ max min norm1 norm2
+ <p>
+ This is purely a convenience function.
+ @enddesc
+ @calls pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2
+@@*/
+
+void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) {
+ res[0] = pGF_MaxVal(GH,GF);
+ res[1] = pGF_MinVal(GH,GF);
+ res[2] = pGF_Norm1(GH,GF);
+ res[3] = pGF_Norm2(GH,GF);
+}
+