diff options
Diffstat (limited to 'src/pugh')
-rw-r--r-- | src/pugh/SetupPGArray.c | 84 | ||||
-rw-r--r-- | src/pugh/SetupPGF.c | 438 | ||||
-rw-r--r-- | src/pugh/SetupSliceCenter.c | 137 | ||||
-rw-r--r-- | src/pugh/SyncGroupGF.c | 271 | ||||
-rw-r--r-- | src/pugh/pGF_FinishRecv.c | 81 | ||||
-rw-r--r-- | src/pugh/pGF_PostRecv.c | 69 | ||||
-rw-r--r-- | src/pugh/pGF_PostSend.c | 116 | ||||
-rw-r--r-- | src/pugh/pGF_Reduction.c | 198 |
8 files changed, 1394 insertions, 0 deletions
diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c new file mode 100644 index 0000000..9a5cf71 --- /dev/null +++ b/src/pugh/SetupPGArray.c @@ -0,0 +1,84 @@ + /*@@ + @file SetupArray.c + @date Fri Feb 21 11:04:20 1997 + @author + @desc + Sets up (eg allocates) the pugh Array. This is trivial. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine SetupPGArray + @date Fri Feb 21 11:05:04 1997 + @author + @desc + Note that npoints is a GLOBAL measure. + @enddesc + @calls DisableGFDataStorage, EnableGFDataStorage +@@*/ + + +pGArray *SetupPGArray(pGH *GH, char *name, int npoints) { + pGArray *res; /* The result */ + pGArray **rlist; /* A temporary to add to the GH */ + int lnpoints, i; + + /* Space for the struct */ + res = (pGArray *)malloc(sizeof(pGArray)); + + /* Set my parent GH */ + res->parentGH = GH; + + /* Set up the name */ + res->name = (char *)malloc(strlen(name)*sizeof(char)); + strcpy(res->name,name); + + /* Allocate my memory */ + if (npoints > 0) { + lnpoints = npoints/GH->nprocs; + if (GH->myproc == GH->nprocs - 1) { + lnpoints = npoints - lnpoints * (GH->nprocs-1); + } + res->npoints = lnpoints; + res->data = (Double *)malloc(lnpoints*sizeof(Double)); + } else { + res->npoints = 1; + res->data = (Double *)malloc(sizeof(Double)); + } + + /* Setup IO stuff to default no IO with space for xgraph files.*/ + res->do_arrayio = 0; + + res->IEEEfile = NULL; + + /* Finally add this to the list of grid funcs in the gh */ + GH->nArrays ++; + rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *)); + if (GH->nArrays > 1) { + for (i=0;i<GH->nArrays-1;i++) + rlist[i] = GH->Arrays[i]; + free(GH->Arrays); + } + res->pgano = GH->nArrays - 1; + rlist[GH->nArrays-1] = res; + GH->Arrays = rlist; + + /* And return myself... */ + return res; +} + +void DestroyPGArray(pGH *GH, pGArray **pGAin) { + pGArray *pGA; + pGA = *pGAin; + + free(pGA->name); + free(pGA->data); + + free(pGA); + *pGAin=NULL; +} diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c new file mode 100644 index 0000000..acec587 --- /dev/null +++ b/src/pugh/SetupPGF.c @@ -0,0 +1,438 @@ + /*@@ + @file SetupPGF.c + @date Fri Feb 21 11:04:20 1997 + @author + @desc + Sets up (eg allocates) the pugh Grid Function. This is + crucial to understaiding the grid function struct. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine SetupPGF + @date Fri Feb 21 11:05:04 1997 + @author + @desc + Allocates the Grid Func structure (I wanted to say object ...) + There are several steps to this + <ul> + <li> Set up the name + <li> Set up the docomm flag and IO flags + <li> Allocate output file pointer arrays + <li> Allocate the data buffer + <li> Allocate the send buffer + <li> Add myself to the Grid Hierarchy + </ul> + But this is pretty straightforward code nonetheless. + <p> + Note that the input variable "storage" determines whether to + allocate the 3-D data or not. You can toggle this later + with @seeroutine DisableGFDataStorage and @seeroutine + EnableGFDataStorage + @enddesc + @calls DisableGFDataStorage, EnableGFDataStorage +@@*/ + + +pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) { + pGF *res; /* The result */ + pGF **rlist; /* A temporary to add to the GH */ + int i, dir, dirp1, dirp2, sz; /* Counter thingies */ + + /* Space for the struct */ + res = (pGF *)malloc(sizeof(pGF)); + + /* Set my parent GH */ + res->parentGH = GH; + + /* Set up the name */ + res->name = (char *)malloc((1+strlen(name))*sizeof(char)); + strcpy(res->name,name); + + res->stagger = stagger; + if( (GH->stagger == PUGH_NO_STAGGER) && + (res->stagger != PUGH_VERTEXCTR) ) + { printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n"); + printf (" designation PUGH_NO_STAGGER. \n"); + STOP; + } + + + /* Setup IO stuff to default no IO with space for xgraph files.*/ + res->do_3dio = 0; + res->do_2dio = 0; + res->do_1dio = 0; + res->do_0dio = 0; + res->do_conv = 0; + + res->convfile = NULL; + + res->xgfile = (FILE **)malloc(5*sizeof(FILE *)); + for (i=0;i<5;i++) + res->xgfile[i] = NULL; + + res->zerodfile = (FILE **)malloc(4*sizeof(FILE *)); + for (i=0;i<4;i++) + res->zerodfile[i] = NULL; + + res->IEEEfile = NULL; + + for (i=0;i<3;i++) + res->IEEEfile_2d[i] = NULL; + + res->show_maxval = 0; + + res->lastio_it[0] = -1; res->lastio_it[1] = -1; + res->lastio_it[2] = -1; res->lastio_it[3] = -1; + + SetGFComm(res, docomm); + + + /* Finally add this to the list of grid funcs in the gh */ + GH->ngridFuncs ++; + rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *)); + if (GH->ngridFuncs > 1) { + for (i=0;i<GH->ngridFuncs-1;i++) + rlist[i] = GH->gridFuncs[i]; + free(GH->gridFuncs); + } + res->gfno = GH->ngridFuncs - 1; + rlist[GH->ngridFuncs-1] = res; + GH->gridFuncs = rlist; + /* Set up the comm layer */ + res->padddata = NULL; + res->data = NULL; + res->send_buffer = NULL; + res->recv_buffer = NULL; + + if (storage == PUGH_NOSTORAGE) { + res->storage = PUGH_STORAGE; /* Fake it out... */ + DisableGFDataStorage(GH, res); + } else if (storage == PUGH_STORAGE) { + res->storage = PUGH_NOSTORAGE; /* Fake it out... */ + EnableGFDataStorage(GH, res); + } else { + printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage); + printf (" Perhaps misspelled in GridFuncList? Defaulting to storage active.\n"); + EnableGFDataStorage(GH, res); + } + +#ifdef MPI + /* Null my send and recieve requests */ + for (i=0;i<6;i++) { + res->sreq[i] = MPI_REQUEST_NULL; + res->rreq[i] = MPI_REQUEST_NULL; + } +#endif + + /* And return myself... */ + return res; +} + + /*@@ + @routine DestroyPGF + @date Thu Aug 21 11:44:22 1997 + @author Paul Walker + @desc + Destroys a GF object. + @enddesc +@@*/ + +void DestroyPGF(pGH *GH, pGF **GFin) { + pGF *GF; + GF = *GFin; + + if (GF->storage) + DisableGFDataStorage(GH, GF); + + free(GF->name); + free(GF->padddata); + + free(GF->xgfile); + free(GF->zerodfile); + + GF->data = NULL; + + free(GF); + *GFin = NULL; + +} + + + /*@@ + @routine EnableGFDataStorage + @date Thu Apr 3 12:44:38 1997 + @author Paul Walker + @desc + This routine toggles the data storage to the "on" + position. That means that data points to a 3D + array, rather than a single Double, and the + storage flag is set to one. This is used quite + a lot by thorns which need to toggle memory. for + instance, see the trK Driver in the util thron. + @enddesc +@@*/ + +/* Statics for padding */ +static int padding_active = -1; +static int padding_cacheline_bits, padding_size, padding_address_spacing; + +int zero_memory = -1; + +void EnableGFDataStorage(pGH *GH, pGF *GF) { + int i, dir, dirp1, dirp2, sz; + int special_pad, cache_size, start; + + if (zero_memory < 0) { + zero_memory = Contains("zero_memory","yes"); + if (zero_memory) { + printf ("Zeroing memory for allocated GFs at alloc time\n"); + } + } + + + if (padding_active == -1) { + padding_active = Contains("padding_active","yes"); + padding_cacheline_bits = Geti("padding_cacheline_bits"); + padding_size = Geti("padding_size"); + padding_address_spacing = Geti("padding_address_spacing"); + if (padding_active) { + printf ("PUGH Memory padding active. Stats:\n"); + printf (" cacheline_bits : %d\n", + padding_cacheline_bits); + printf (" size : %d\n", + padding_size); + printf (" spacing : %d\n", + padding_address_spacing); + } + } + + if (GF->storage) { + printf ("WARNING: Tried to enable %s when already enabled\n", + GF->name); + return; + } + + /* Set up the send buffers. Note we only do this if + we are not using the derived types comm style. + */ + assert(!GF->send_buffer); + GF->send_buffer = (Double **)malloc(6*sizeof(Double *)); + + assert(!GF->recv_buffer); + GF->recv_buffer = (Double **)malloc(6*sizeof(Double *)); + + for (i=0;i<6;i++) { + dir = i/2; + dirp1 = (i/2+1)%3; + dirp2 = (i/2+2)%3; + + if (dir == 0) { + sz = GH->stencil_width * GH->lny * GH->lnz; + } else if (dir == 1) { + sz = GH->stencil_width * GH->lnx * GH->lnz; + } else { + sz = GH->stencil_width * GH->lnx * GH->lny; + } + + if (GH->neighbors[GH->myproc][i] >= 0) { + GF->buffer_sz[i] = sz; + GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double)); + GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double)); + assert(GF->recv_buffer[i]); + assert(GF->send_buffer[i]); + } else { + GF->buffer_sz[i] = 0; + GF->send_buffer[i] = NULL; + GF->recv_buffer[i] = NULL; + } + } + + /* Set up the storage */ + if (GF->padddata) free (GF->padddata); + if (!padding_active) { + GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double)); + GF->data = GF->padddata; /* No padding case */ + if (zero_memory) { + for (i=0;i<GH->npoints;i++) + GF->padddata[i] = 0.0; + } + } else { + GF->padddata = (Double *)malloc((GH->npoints + padding_size) + *sizeof(Double)); + + /* Align the actual starting address on a cache line. + More specifically align on a unique cache line for + each field. + */ + cache_size = 2 << (padding_cacheline_bits - 1); + start = ((long) (GF->padddata) / sizeof(Double))%cache_size; + + special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size; + GF->data = (GF->padddata) + special_pad; + + if (special_pad > padding_size){ + printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size); + STOP; + } + if (zero_memory) { + for (i=0;i<GH->npoints+padding_size;i++) + GF->padddata[i] = 0.0; + } + } + + if (!GF->padddata) { + printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n", + GF->name, GF->gfno); + STOP; + } + GF->storage = 1; + + +} + + /*@@ + @routine DisableGFDataStorage + @date Thu Apr 3 12:45:34 1997 + @author Paul Walker + @desc + This routine disables the grid function storage. + That is, it un-allocates the 3D array and in its + place allocates a single Double with the value + 0.0. This allows us to still ahve something to + pass around (an array of size (1,1,1) in fortran + speak) but also to not need all our 3D arrays + "on" all the time. + @enddesc +@@*/ + + +void DisableGFDataStorage(pGH *GH, pGF *GF) { + int i; + + if (!GF->storage) { + printf ("Warning: Tried to disable %s when already disabled\n", + GF->name); + return; + } + + if (GF->padddata) { + free(GF->padddata); + GF->padddata = NULL; + GF->data = NULL; + } + + if (GF->send_buffer) { + for (i=0;i<6;i++) + if (GF->send_buffer[i]) { +#ifdef MPI + if(GF->sreq[i] != MPI_REQUEST_NULL){ + CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i]))); + } +#endif + free(GF->send_buffer[i]); + GF->send_buffer[i] = NULL; + } + free(GF->send_buffer); + GF->send_buffer = NULL; + } + + + if (GF->recv_buffer) { + for (i=0;i<6;i++) + if (GF->recv_buffer[i]) { + free(GF->recv_buffer[i]); + GF->recv_buffer[i] = NULL; + } + free(GF->recv_buffer); + GF->recv_buffer = NULL; + } + + GF->padddata = (Double *)malloc(sizeof(Double)); + GF->data = GF->padddata; + GF->data[0] = 0.0; /* Very important! */ + GF->storage = 0; +} + + /*@@ + @routine SetGFComm + @date Thu Apr 3 12:46:23 1997 + @author Paul Walker + @desc + This sets the docomm[3] array of the GF based + on the setting of the comm flag. Note the + comm flag constants are defined in + @seeheader pughDriver.h. As well as SetupPGF + this is called by thorns (eg, elliptic) which + need to toggle this state. + @enddesc + @calledby SetupPGF +@@*/ + + +void SetGFComm(pGF *res, int docomm) { + /* Copy docomm */ + int i; + res->commflag = docomm; + + for (i=0;i<6;i++) + res->docomm[i] = 0; + + if (docomm == PUGH_NOCOMM) { + for (i=0;i<6;i++) + res->docomm[i] = 0; + } else if (docomm == PUGH_ALLCOMM) { + for (i=0;i<6;i++) + res->docomm[i] = 1; + } else if (docomm == PUGH_XCOMM) { + res->docomm[0] = 1; + res->docomm[1] = 1; + } else if (docomm == PUGH_YCOMM) { + res->docomm[2] = 1; + res->docomm[3] = 1; + } else if (docomm == PUGH_ZCOMM) { + res->docomm[4] = 1; + res->docomm[5] = 1; + } else if (docomm == PUGH_PLUSFACESCOMM) { + res->docomm[1] = 1; + res->docomm[3] = 1; + res->docomm[5] = 1; + } else if (docomm == PUGH_MINUSFACESCOMM) { + res->docomm[0] = 1; + res->docomm[2] = 1; + res->docomm[4] = 1; + } else { + printf("WARNING: Unsupported comm. in grid function %s .\n",res->name); + printf(" Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n"); + for (i=0;i<6;i++) + res->docomm[i] = 1; + } + + /* Handle nx = 1 type cases. This is only important for one + processor MPI periodic boundaries. + */ + /* This is a hack to get a GH. Unfortunately I decided not + to pass a GH to this routine, and I should go back and + change it, but that is a lot of changes so for now I'll + do the following. + */ + if (GHList->GH->nx == 1) { + res->docomm[0] = 0; + res->docomm[1] = 0; + } + + if (GHList->GH->ny == 1) { + res->docomm[2] = 0; + res->docomm[3] = 0; + } + + if (GHList->GH->nz == 1) { + res->docomm[4] = 0; + res->docomm[5] = 0; + } +} diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c new file mode 100644 index 0000000..58e6a07 --- /dev/null +++ b/src/pugh/SetupSliceCenter.c @@ -0,0 +1,137 @@ + /*@@ + @file SetupSliceCenter.c + @date + @author + @desc + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + +void SetupSliceCenter(pGH *GH) { + int slice_center; + + + /* slice_center used to be part of the GF, */ + /* either 1 (default) or 0 (octant,edge)*/ + /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */ + + if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) { + printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)"); + printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n", + GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]); + return; + } + + slice_center=1; + if (Contains("grid","octant") || Contains("grid","quadrant") || + Contains("grid","bitant")) + slice_center=0; + if (Contains("slice_loc","edge")) slice_center=0; + + if (slice_center) { + /* For NONE octant mode: the center for slicings */ + /* is moved to the center index */ + /* (unless nx,ny,nz=1) */ + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + } + + if (GH->ny == 1) { + GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } else { + GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + } + + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + } + } + /* for octant mode, the slicing center is the index 1 or zero*/ + else { + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = 1 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0]; + } + + if (GH->ny == 1) { + GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } else { + GH->spxyz[1] = 1 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1]; + } + + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = 1 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2]; + } + } + + /* Special case: in quadrant mode x and y are like full, but + z is like octant. Thinking about this gives you a headache, + but works. + */ + if (Contains("grid","quadrant")) { + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + } + } + + /* Another special case: bitant! Hey, with full and octant + already implemented, bitant is a breeze on the neurons */ + if (Contains("grid","bitant")) + { if (GH->nx == 1) + { GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } + else + { GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + } + if (GH->ny == 1) + { GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } + else + { GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + } + } + +#ifdef THORN_CARTOON_2D + /* for cartoon_2d, treat x as in octant, y and z as in full, BB */ + if (Contains("cartoon_active","yes")) { + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = 1 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0]; + } + } +#endif +} diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c new file mode 100644 index 0000000..2d21ae2 --- /dev/null +++ b/src/pugh/SyncGroupGF.c @@ -0,0 +1,271 @@ +/*@@ + @file SyncGroupGH.c + @author Paul Walker + @date March 1997 + @desc + Synchronizes all the Grid Functions (which have their communications + on) in a given group which has previously been registered to + the GF data base with StoreGFGroupData. The MPI section was + formally called SyncGH.c + @enddesc + @history + @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale + @hdesc Added the functionality to synchronise indices from a group + rather than the whole GH. Changed the routine/file names. + @endhistory + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + +/*#define VERBOSE*/ +#undef COMM_TIMING + +int GetGFGroupData(const char *name, int *nGFs, int **GFIndices); + +/*@@ + @routine SyncGroupGF + @author Paul Walker + @date March 1997 + @desc + Synchronizes all the Grid Functions (which have their communications + on) in a given group which has previously been registered to + the GF data base with StoreGFGroupData. The MPI section was + formally called SyncGH.c + @enddesc + @history + @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale + @hdesc Added the functionality to synchronise indices from a group + rather than the whole GH. Changed the routine/file names. + @endhistory + @@*/ + + +/* This routine works in one of two ways: + * + * 1. If using derived data types, it posts receives, posts sends + * waits for all sends and receives, and finishes the receives. + * + * 2. If using buffered communication, it posts recieves, posts + * sends, waits for receives only, and finishes them. We don't + * need to wait for sends until the next time through. This + * is why the wait for the send buffer is immediately before + * the post send. (Since the send buffers are initially set + * to MPI_REQUEST_NULL the first Wait and any waits in + * unused directions are a wash). + * + * This routine is intertwined with the contents of routines + * pGF_PostRecv, pGF_PostSend, and pGF_FinishSend. + * + * Without MPI, this does nothing, of course. + * + * Note this routine is also responsible for updating the + * commt global so we can get a measure of how much time we + * spend in communications overhead. + * + * Also note that it is more efficient to sync a number of + * GF together (as a group or a hierachy) rather than + * singly. This is because syncing single functions waits for the + * receives on a single function before posting the sends on + * all the other functions. + */ + +void SyncGroupGF(pGH *GH,const char *groupname) { +#ifdef MPI + int j, Dir, index; + double t1,t2,t3; + MPI_Request *sr = NULL; + MPI_Status ms, mss; +#endif + int i; + double st[PUGH_NTIMERS],et[PUGH_NTIMERS]; + int l; + int *GFindices; + int nGFs; + + pughTimers(st); + + /* Find which grid functions make up the group */ + GetGFGroupData(groupname,&nGFs,&GFindices); + + /* Say which grid functions will actually be synchronised */ + if (Contains("cactus_sync_verbose","yes")) { + printf("Syncing %d grid functions in group %s\n",nGFs,groupname); + printf(" Syncing GFs ..."); + for (i=0;i<nGFs;i++) { + if (GH->gridFuncs[GFindices[i]]->storage + && (GH->gridFuncs[GFindices[i]]->docomm[1] || + GH->gridFuncs[GFindices[i]]->docomm[2] || + GH->gridFuncs[GFindices[i]]->docomm[3] || + GH->forceSync)) + printf(" %s",GH->gridFuncs[GFindices[i]]->name); + } + printf("\n"); + } + +#ifdef MPI + if (GH->commmodel == PUGH_DERIVEDTYPES) { + /* 2 faces, send and receive is the 2 * 2 */ + sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request)); + for (i=0;i<nGFs * 2 * 2; i++) + sr[i] = MPI_REQUEST_NULL; + } + + for (Dir = 0; Dir < 3; Dir ++) { +#ifdef COMM_TIMING + t1 = MPI_Wtime(); +#endif + for(i=0;i<nGFs;i++) { + pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir); + pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("PR : %lf\n",t2-t1); +#endif + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + for(i=0;i<nGFs;i++) { + /* Wait for the last send. Since these will be null if the + are not used, this is always safe. + */ + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir); + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } else { + for(i=0;i<nGFs;i++) { + /* Wait for the last send. Since these will be null if the + are not used, this is always safe. + */ + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } + +#ifdef COMM_TIMING + t1 = MPI_Wtime(); + printf ("PS : %lf\n",t1-t2); +#endif + + /* Now comes the big difference between derived types and + allocated buffers. With derived types, we now have to + wait on all our recieve AND SEND buffers so we can + keep on using the send buffers ( as communications are + in-place). With the allocated we have to wait on each + recieve, but not on the send, since we don't need the + send buffer until we pack a send again (above) + */ + + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + /* Do a wait any on the recieves */ + for(i=0;i<nGFs;i++) { + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss); + pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir); + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss); + pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } + + if (GH->commmodel == PUGH_DERIVEDTYPES) { + /* Load up the thing for the waitall */ + for (i=0;i<nGFs;i++) { + int id = i*4; + if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] && + GH->gridFuncs[GFindices[i]]->storage) { + sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir]; + sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir]; + } else { + sr[id] = MPI_REQUEST_NULL; + sr[id+1] = MPI_REQUEST_NULL; + } + + if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] && + GH->gridFuncs[GFindices[i]]->storage) { + sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]; + sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]; + } else { + sr[id+2] = MPI_REQUEST_NULL; + sr[id+3] = MPI_REQUEST_NULL; + } + } + /* Now do a waitall */ + MPI_Waitall(4*nGFs,sr,&ms); + + } + if (sr) free(sr); +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("FR : %lf\n",t2-t1); +#endif + } +#endif + pughTimers(et); + for (l=0;l<PUGH_NTIMERS;l++) + commt[l] += et[l]-st[l]; +} + + +/*@@ + @routine SyncGH + @author Gabrielle Allen + @date Sep 27 1998 + @desc + Wrapper to support SyncGH command, which is now implemented through + a group. + @enddesc + @@*/ + +void SyncGH(pGH *GH) { + + SyncGroupGF(GH,"$GH"); + +} + + +/*@@ + @routine SyncGF + @author Gabrielle Allen + @date Sep 27 1998 + @desc + Wrapper to support SyncGF command, which is now implemented through + a group. Note that it is more efficient to sync all required GFs at + the same time, using the @seeroutine SyncGroupGF routine with a given group. + @enddesc + @@*/ + +void SyncGF(pGH *GH,pGF *GF) { + + SyncGroupGF(GH,GF->name); + +} + + +/* The old code for SyncGF is listed here, since it illustrates simply + * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv + * */ + +/* void SyncGF(pGH *GH, pGF *GF) { */ + +/* #ifdef MPI */ +/* int Dir; */ +/* MPI_Status mss; */ +/* for (Dir=0;Dir<3;Dir++) { */ +/* pGF_PostRecv(GH, GF, 2*Dir); */ +/* pGF_PostRecv(GH, GF, 2*Dir+1); */ + +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */ +/* pGF_PostSend(GH, GF, 2*Dir); */ +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */ +/* pGF_PostSend(GH, GF, 2*Dir+1); */ + +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */ +/* pGF_FinishRecv(GH, GF, 2*Dir); */ +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */ +/* pGF_FinishRecv(GH, GF, 2*Dir+1); */ +/* } */ +/* #endif */ +/* } */ + diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c new file mode 100644 index 0000000..316dfc1 --- /dev/null +++ b/src/pugh/pGF_FinishRecv.c @@ -0,0 +1,81 @@ + /*@@ + @file pGF_FinishRecv.c + @date Thu Apr 3 11:37:28 1997 + @author Paul Walker + @desc + The routine which finalize the MPI recieves for a grid + function. Critically linked with @seefile pGF_PostRecv.c + and @seefile pGF_PostSend.c + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_FinishRecv + @date Thu Apr 3 11:38:07 1997 + @author Paul Walker + @desc + This routine finalizes the MPI communication through a face. + It is crucially linked with @seeroutine pGF_PostRecv and + @seeroutine pGF_PostSend. + <p> + <b>Important!</b> + This routine does <b>not</b> wait on the recieves. + Before it is called, you must do the MPI_Wait or + else you will not get the right answer. for an + example of this, see @seeroutine SyncGroupGF or + @seeroutine SyncGroupGF + @enddesc + @calledby SyncGroupGF + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory +@@*/ + +void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) { +#ifdef MPI + int R; + MPI_Status ms; + int istart, iend, jstart, jend, kstart, kend; + int ii,jj,kk,xx; + int tmp; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + /* Here wait one at a time, so the others can arrive while + we copy the data back in... */ + +#ifdef DEBUG_PRINT + printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n", + GF->name,dir,GH->myproc,GF->rreq[dir]); +#endif + + /* Copy buffer onto GF Storage */ + istart = GH->ghosts[GF->stagger][dir][0][0]; + iend = GH->ghosts[GF->stagger][dir][0][1]; + jstart = GH->ghosts[GF->stagger][dir][1][0]; + jend = GH->ghosts[GF->stagger][dir][1][1]; + kstart = GH->ghosts[GF->stagger][dir][2][0]; + kend = GH->ghosts[GF->stagger][dir][2][1]; + + /* Great now copy. Note ordering is just link in PostSend */ + xx=0; + for (kk=kstart; kk<kend; kk++) + for (jj=jstart; jj<jend; jj++) + for (ii=istart; ii<iend; ii++) + GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++]; + + } + +#endif +} diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c new file mode 100644 index 0000000..81c4a30 --- /dev/null +++ b/src/pugh/pGF_PostRecv.c @@ -0,0 +1,69 @@ + /*@@ + @file pGF_PostRecv.c + @date Fri Feb 21 11:28:06 1997 + @author + @desc + Routines which post all the IRecv commands for a GF. These + allow the asyncronous model proposed in, for example, + @seeroutine SyncGF to go! + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_PostRecv + @date Thu Apr 3 12:10:46 1997 + @author Paul Walker + @desc + This routine posts a recieve (MPI_Irecv) of the buffer + we want to get the send from another processor, which + will be sent by @seeroutine pGF_PostSend and then + finalized by @seeroutoine pGF_FinishRecv. + <p> + Aside from a silly calculation to get a unique tag + based on neigbors and processors, this is a very + straightforward routine. + @enddesc + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory + @@*/ + +void pGF_PostRecv(pGH *GH, pGF *GF, int dir) { +#ifdef MPI + int R; + int rtag; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno); + /* mod the tag to force MPI compliance */ + rtag = rtag % 32768; +#ifdef DEBUG_PRINT + printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n", + GF->name,dir,GH->myproc,rtag, + GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]); +#endif + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) + CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]), + GF->buffer_sz[dir],PUGH_MPI_TYPE, + GH->neighbors[GH->myproc][dir],rtag, + GH->PUGH_COMM_WORLD,&(GF->rreq[dir]))); + if (GH->commmodel == PUGH_DERIVEDTYPES) + CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data), + 1,GH->recv_dt[GF->stagger][dir], + GH->neighbors[GH->myproc][dir],rtag, + GH->PUGH_COMM_WORLD,&(GF->rreq[dir]))); + } +#endif +} diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c new file mode 100644 index 0000000..d8ec630 --- /dev/null +++ b/src/pugh/pGF_PostSend.c @@ -0,0 +1,116 @@ + /*@@ + @file pGF_PostSend.c + @date Thu Apr 3 11:58:14 1997 + @author Paul Walker + @desc + The send-arm of the three-way pugh communcations branch + which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c + and @seefile pGF_FinishRecv.c. See the documentation for + @seeroutine pGF_PostSend for details on this routine. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_PostSend + @date Thu Apr 3 11:58:59 1997 + @author Paul Walker + @desc + This routine posts an asyncronous MPI send of a buffer + for a face (MPI_Isend). It does this by first packing up + a send buffer (allocated in @seeroutine SetupPGF) then + sending it out on the pipe. + <p> + Since this is an asynchronous communications model we + assume that a recieve has been posted for the send. + thus the correct calling order for this is as in + @seeroutine SyncGF, eg, after a Recv. + <p> + Note this does <b>not</b> wait on the send buffer from previous + communications. It is the users responsibility to wait on + that buffer. + @enddesc + @calledby SyncGroupGF + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory +@@*/ + +void pGF_PostSend(pGH *GH, pGF *GF, int dir) { + int R, stag, dircomp; + int tmp; + int ii,jj,kk,xx; + int istart, iend; + int jstart, jend; + int kstart, kend; + +#ifdef MPI + Double ts, tw, tp; + + MPI_Status ms; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + ts = MPI_Wtime(); + dircomp = dir+1; /* Complementary direction */ + if (dircomp %2 == 0) dircomp = dir-1; + /* Note this is the complement of the rtag set in PostRecv */ + stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] + + GH->nprocs * GF->gfno); + /* mod the tag to force MPI compliance */ + stag = stag % 32768; + +#ifdef DEBUG_PRINT + printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n", + GF->name,dir,GH->myproc,stag, + GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]); +#endif + + if (GH->commmodel == PUGH_DERIVEDTYPES) + CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data), + 1,GH->send_dt[GF->stagger][dir], + GH->neighbors[GH->myproc][dir],stag, + GH->PUGH_COMM_WORLD,&(GF->sreq[dir]))); + + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + /* Copy my information into the send buffer */ + istart = GH->overlap[GF->stagger][dir][0][0]; + iend = GH->overlap[GF->stagger][dir][0][1]; + jstart = GH->overlap[GF->stagger][dir][1][0]; + jend = GH->overlap[GF->stagger][dir][1][1]; + kstart = GH->overlap[GF->stagger][dir][2][0]; + kend = GH->overlap[GF->stagger][dir][2][1]; + + /* Great now copy */ + xx = 0; + for (kk=kstart; kk<kend; kk++) + for (jj=jstart; jj<jend; jj++) + for (ii=istart; ii<iend; ii++) + GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)]; + + tp = MPI_Wtime(); + + /* post send */ + CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]), + GF->buffer_sz[dir],PUGH_MPI_TYPE, + GH->neighbors[GH->myproc][dir],stag, + GH->PUGH_COMM_WORLD,&(GF->sreq[dir]))); + tw = MPI_Wtime(); +#ifdef COMM_TIMING + printf ("GF %s Dir %d Time %lf\n", + GF->name, dir, tw-ts); +#endif + } + } +#endif +} diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c new file mode 100644 index 0000000..bc8e211 --- /dev/null +++ b/src/pugh/pGF_Reduction.c @@ -0,0 +1,198 @@ + /*@@ + @file pGF_Reduction.c + @date Thu Apr 3 11:54:53 1997 + @author Paul Walker + @desc + Various MPI reduction operators for maxima, minima, norms + and the like on grid functions. These are all pretty + straighforward thanks to the miracle of MPI_Allreduce. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_MaxVal + @date Thu Apr 3 11:55:25 1997 + @author Paul Walker + @desc + Returns the maximum value of a distributed grid function. + @enddesc +@@*/ + + +Double pGF_MaxVal(pGH *GH, pGF *GF) { + Double res, lres; + int i; + + if (!GF->storage) return GF->maxval; + + res = GF->data[0]; + for (i=1;i<GH->npoints;i++) + res = (GF->data[i] > res? GF->data[i] : res); + + + /* So now res is the local max */ +#ifdef MPI + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD)); +#endif + + return res; +} + + + /*@@ + @routine pGF_MinVal + @date Thu Apr 3 11:55:48 1997 + @author Paul Walker + @desc + Returns the minimum value of a distributed grid function. + @enddesc +@@*/ + +Double pGF_MinVal(pGH *GH, pGF *GF) { + Double res, lres; + int i; + + if (!GF->storage) return GF->minval; + + res = GF->data[0]; + for (i=1;i<GH->npoints;i++) + res = (GF->data[i] < res ? GF->data[i] : res); + + /* So now res is the local max */ +#ifdef MPI + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD)); +#endif + + return res; +} + + + + /*@@ + @routine pGF_Norm1 + @date Thu Apr 3 11:56:05 1997 + @author Paul Walker + @desc + Returns the average of a distributed grid function. + @enddesc +@@*/ + +Double pGF_Norm1(pGH *GH, pGF *GF) { + Double res, lres; + int i, ii, jj, kk; + int is,ie,js,je,ks,ke; + int np,tnp; + + if (!GF->storage) return GF->norm1; + + +#define ABS(x) ((x)<0?-(x):(x)) + +#ifndef MPI + res = 0.0; + for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]); + tnp = GH->npoints; + +#else + res = 0.0; + np = 0; + for (kk=GH->ownership[GF->stagger][2][0]; + kk < GH->ownership[GF->stagger][2][1]; + kk ++) + for (jj = GH->ownership[GF->stagger][1][0]; + jj < GH->ownership[GF->stagger][1][1]; + jj ++) + for (ii = GH->ownership[GF->stagger][0][0]; + ii < GH->ownership[GF->stagger][0][1]; + ii ++) { + res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]); + np++; + } + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD)); + CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD)); +#endif + + res = res/tnp; + return res; +} + + + /*@@ + @routine pGF_Norm2 + @date Thu Apr 3 11:56:36 1997 + @author Paul Walker + @desc + Returns the "norm2" of a distributed grid function. That + is, returns $\sqrt{\Sigma (a_i * a_i) / np}$. + @enddesc +@@*/ + + +Double pGF_Norm2(pGH *GH, pGF *GF) { + Double res, lres; + int i, ii, jj, kk; + int is,ie,js,je,ks,ke; + int np, tnp; + + if (!GF->storage) return GF->norm2; + +#ifndef MPI + res = 0.0; + for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i]; + tnp = GH->npoints; + +#else + /* MPI version depends heavily on the ownership array */ + res = 0.0; + np = 0; + + for (kk=GH->ownership[GF->stagger][2][0]; + kk < GH->ownership[GF->stagger][2][1]; + kk ++) + for (jj = GH->ownership[GF->stagger][1][0]; + jj < GH->ownership[GF->stagger][1][1]; + jj ++) + for (ii = GH->ownership[GF->stagger][0][0]; + ii < GH->ownership[GF->stagger][0][1]; + ii ++) { + res += GF->data[DATINDEX(GH,ii,jj,kk)] * + GF->data[DATINDEX(GH,ii,jj,kk)]; + np ++; + } + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD)); + CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD)); +#endif + + res = sqrt(res/tnp); + return res; +} + + /*@@ + @routine pGF_AllNorms + @date Mon Jun 30 10:44:57 1997 + @author Paul Walker + @desc + Returns all 4 norms into a Double pointer in the order + max min norm1 norm2 + <p> + This is purely a convenience function. + @enddesc + @calls pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2 +@@*/ + +void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) { + res[0] = pGF_MaxVal(GH,GF); + res[1] = pGF_MinVal(GH,GF); + res[2] = pGF_Norm1(GH,GF); + res[3] = pGF_Norm2(GH,GF); +} + |