diff options
-rw-r--r-- | README | 19 | ||||
-rw-r--r-- | interface.ccl | 6 | ||||
-rw-r--r-- | param.ccl | 27 | ||||
-rw-r--r-- | schedule.ccl | 7 | ||||
-rw-r--r-- | src/Comm.c | 88 | ||||
-rw-r--r-- | src/GHExtension.c | 44 | ||||
-rw-r--r-- | src/SetupPGH.c | 1072 | ||||
-rw-r--r-- | src/Startup.c | 67 | ||||
-rw-r--r-- | src/include/pGArray.h | 20 | ||||
-rw-r--r-- | src/include/pGF.h | 100 | ||||
-rw-r--r-- | src/include/pGH.h | 132 | ||||
-rw-r--r-- | src/include/pugh.h | 150 | ||||
-rw-r--r-- | src/include/pughDriver.h | 68 | ||||
-rw-r--r-- | src/include/pughProblem.h | 53 | ||||
-rw-r--r-- | src/include/pugh_constants.h | 64 | ||||
-rw-r--r-- | src/make.code.defn | 8 | ||||
-rw-r--r-- | src/pugh/SetupPGArray.c | 84 | ||||
-rw-r--r-- | src/pugh/SetupPGF.c | 438 | ||||
-rw-r--r-- | src/pugh/SetupSliceCenter.c | 137 | ||||
-rw-r--r-- | src/pugh/SyncGroupGF.c | 271 | ||||
-rw-r--r-- | src/pugh/pGF_FinishRecv.c | 81 | ||||
-rw-r--r-- | src/pugh/pGF_PostRecv.c | 69 | ||||
-rw-r--r-- | src/pugh/pGF_PostSend.c | 116 | ||||
-rw-r--r-- | src/pugh/pGF_Reduction.c | 198 | ||||
-rw-r--r-- | src/pugh_Comm.h | 52 | ||||
-rw-r--r-- | src/pugh_extension.h | 30 |
26 files changed, 3401 insertions, 0 deletions
@@ -0,0 +1,19 @@ +Cactus Code Thorn pugh +Authors : ... +Managed by : ... <...@...........> +Version : ... +CVS info : : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n-------------------------------------------------------------------------- + +1. Purpose of the thorn + +This thorn does ... + +2. Dependencies of the thorn + +This thorn additionally requires thorns ... + +3. Thorn distribution + +This thorn is available to ... + +4. Additional information diff --git a/interface.ccl b/interface.ccl new file mode 100644 index 0000000..cd3d94f --- /dev/null +++ b/interface.ccl @@ -0,0 +1,6 @@ +# Interface definition for thorn pugh +# $Header$ + +implements:driver +inherits: Cactus + diff --git a/param.ccl b/param.ccl new file mode 100644 index 0000000..6dcb5d7 --- /dev/null +++ b/param.ccl @@ -0,0 +1,27 @@ +# Parameter definitions for thorn pugh +# $Header$ + +public: + +INTEGER nx "The size of the grid in the x direction" +{ + *:-1 :: "Fixed size of minus this per processor" + 1:* :: "Grid of this size distributed across all processors" +} 10 + +INTEGER ny "The size of the grid in the y direction" +{ + *:-1 :: "Fixed size of minus this per processor" + 1:* :: "Grid of this size distributed across all processors" +} 10 + +INTEGER nz "The size of the grid in the z direction" +{ + *:-1 :: "Fixed size of minus this per processor" + 1:* :: "Grid of this size distributed across all processors" +} 10 + +INTEGER ghost_size "The width of the ghost zone" +{ + 1:* :: "Must be a positive integer" +} 1 diff --git a/schedule.ccl b/schedule.ccl new file mode 100644 index 0000000..7731f24 --- /dev/null +++ b/schedule.ccl @@ -0,0 +1,7 @@ +# Schedule definitions for thorn pugh +# $Header$ + +schedule pugh_Startup at STARTUP +{ + LANG:C +} "Pugh startup routine." diff --git a/src/Comm.c b/src/Comm.c new file mode 100644 index 0000000..69f1e9f --- /dev/null +++ b/src/Comm.c @@ -0,0 +1,88 @@ + /*@@ + @file Comm.c + @date Thu Feb 4 11:34:29 1999 + @author Tom Goodale + @desc + Pugh communication functions + @enddesc + @@*/ + +#include <stdarg.h> +#include "flesh.h" + +int pugh_SyncGroup(cGH *GH, const char *group) +{ + return 0; +} + +int pugh_EnableGroupStorage(cGH *GH, const char *group) +{ + return 0; +} + +int pugh_DisableGroupStorage(cGH *GH, const char *group) +{ + return 0; +} + + +int pugh_EnableGroupComm(cGH *GH, const char *group) +{ + return 0; +} + +int pugh_DisableGroupComm(cGH *GH, const char *group) +{ + return 0; +} + + +int pugh_Barrier(cGH *GH) +{ + return 0; +} + +int pugh_Reduce(cGH *GH, + const char *operation, + int n_infields, + int n_outfields, + int out_type, + void **outarray, + ...) +{ + return 0; +} + + + +int pugh_Interp(cGH *GH, + const char *operation, + int n_coords, + int n_infields, + int n_outfields, + int n_points, + int type, + ...) +{ + return 0; +} + + +int pugh_ParallelInit(cGH *GH) +{ + return 0; +} + +int pugh_Exit(cGH *GH) +{ + return 0; +} + +int pugh_Abort(cGH *GH) +{ + return 0; +} + + + + diff --git a/src/GHExtension.c b/src/GHExtension.c new file mode 100644 index 0000000..f36f6ee --- /dev/null +++ b/src/GHExtension.c @@ -0,0 +1,44 @@ + /*@@ + @file GHExtension.c + @date Thu Feb 4 10:47:14 1999 + @author Tom Goodale + @desc + Pugh GH extension stuff. + @enddesc + @@*/ + +#include <stdio.h> +#include <stdlib.h> +#include "flesh.h" +#include "pugh.h" +#include "declare_parameters.h" + +static char *rcsid = "$Header$"; + +void *pugh_SetupGH(tFleshConfig *config, + int convergence_level, + cGH *GH) +{ + DECLARE_PARAMETERS + pGH *newGH; + + newGH = SetupPGH(nx, ny, nz, ghost_size, PUGH_NO_STAGGER); + + if(!newGH) + { + fprintf(stderr, "PUGH: Failed to allocate memory for a pGH !\n"); + exit(2); + } + + return newGH; +} + +int pugh_InitGH(cGH *GH) +{ + return 0; +} + +int pugh_rfrTraverseGH(cGH *GH, int rfrpoint) +{ + return 0; +} diff --git a/src/SetupPGH.c b/src/SetupPGH.c new file mode 100644 index 0000000..76d1fd5 --- /dev/null +++ b/src/SetupPGH.c @@ -0,0 +1,1072 @@ + /*@@ + @file SetupPGH.c + @date Fri Feb 21 10:18:17 1997 + @author Paul Walker + @desc + Initializes the pgh. You should really consult the + pGH description (@seeheader pGH.h) in order to clearly + understand what is going on. + @enddesc + @@*/ + +#include "pugh.h" + +/* Old Id: SetupPGH.c,v 1.29 1999/01/12 08:23:35 bruegman Exp */ + +static char *rcsid = "$Id$"; + + /*@@ + @routine SetupPGH + @date Fri Feb 21 10:21:36 1997 + @author Paul Walker + @desc + Sets up the PGH distribution based on processor number and + problem size. This amounts to: + <ul> + <li> Allocating and copying some stuff + <li> Figuring out an optimal domain decomposition + <li> Setting up ghosts and overlaps. + </ul> + Of course the latter two are hard, so some more comments + are in order. + <p> + The second is in @seeroutine ProcTop now. + <p> + The third should be obvious ;-) + @enddesc + @calls ProcTop + @history + @hauthor Gabrielle Allen @hdate 26 Sep 1998 + @hdesc Changed IO parameter names + @hauthor Gabrielle Allen @hdate 17 Oct 1998 + @hdesc Added initialisation of IO structure + @hauthor Gabrielle Allen @hdate 16 Nov 1998 + @hdesc Added initialisation GH->forceSync + @hdate Thu Feb 4 11:13:37 1999 + @hauthor Tom Goodale + @hdesc Modified for Cactus 4.0 +@@*/ + +pGH *SetupPGH(int nx, int ny, int nz, int stencil_width,int staggertype) { + pGH *GH; + + + void pGH_SetupBasics(pGH *, int, int, int, int, int); + void pGH_SetupBounds(pGH *, int, int, int); + void pGH_SetupOwnership(pGH *); + + /* Allocate for myself */ + GH = (pGH*) malloc(sizeof(pGH)); + + pGH_SetupBasics(GH,nx,ny,nz,stencil_width,staggertype); + pGH_SetupBounds(GH,nx,ny,nz); + pGH_SetupOwnership(GH); + + /* slice center not yet set: -1 */ + GH->spxyz[0]=-1; + GH->spxyz[1]=-1; + GH->spxyz[2]=-1; + + + /* And we're done... */ + return GH; +} + + +void pGH_SetupBasics(pGH *GH, int nx, int ny, int nz, + int stencil_width,int staggertype) { + int i; + int nprocx,nprocy,nprocz; + + pGHList *ltemp; + + void ProcTop(int, int *, int *, int *); + + + /* MPI Goobits */ +#ifdef MPI + /* Set up my communicator. This would allow us to run pugh on + a subset of processors at a later date if, for instance, we + were using panda or what not. + */ + CACTUS_MPI_ERROR(MPI_Comm_dup(MPI_COMM_WORLD, &(GH->PUGH_COMM_WORLD))); + + CACTUS_MPI_ERROR(MPI_Comm_size(GH->PUGH_COMM_WORLD, &(GH->nprocs))); + CACTUS_MPI_ERROR(MPI_Comm_rank(GH->PUGH_COMM_WORLD, &(GH->myproc))); +#else + GH->nprocs = 1; + GH->myproc = 0; +#endif + + /* Activate GH (conv.grids can be inactivatet by nancheck so the evolution will + carry on) */ + GH->active=1; + + /* Figure out the processor topology first so we can have constant + work per processor. + */ + ProcTop(GH->nprocs,&nprocx,&nprocy,&nprocz); + GH->nprocx = nprocx; + GH->nprocy = nprocy; + GH->nprocz = nprocz; + + + /* Set up the global size */ + /* Try this exceptionally cheap method */ + if (nx < 0 && GH->nprocx > 1) { + GH->nx = (GH->nprocx-2) * (-nx - 2*stencil_width) + + 2 * (-nx - stencil_width); + if (staggertype == PUGH_STAGGER) GH->nx -= nprocx-1; + } else { + GH->nx = abs(nx); + } + + if (ny < 0 && GH->nprocy > 1) { + GH->ny = (GH->nprocy-2) * (-ny - 2*stencil_width) + + 2 * (-ny - stencil_width); + if (staggertype == PUGH_STAGGER) GH->ny -= nprocy-1; + } else { + GH->ny = abs(ny); + } + + if (nz < 0 && GH->nprocz > 1) { + GH->nz = (GH->nprocz-2) * (-nz - 2*stencil_width) + + 2 * (-nz - stencil_width); + if (staggertype == PUGH_STAGGER) GH->nz -= nprocz-1; + } else { + GH->nz = abs(nz); + } + + GH->stencil_width = stencil_width; + + + /* + if (Contains("commmodel","loc") != 0) { + GH->commmodel = PUGH_ALLOCATEDBUFFERS; + } else { + GH->commmodel = PUGH_DERIVEDTYPES; + } + */ + GH->commmodel = PUGH_ALLOCATEDBUFFERS; + +#ifdef 0 + /* Set up the IO Processor. */ + GH->IO.outpfx[0] = '\0'; + + if (Contains("iomode","proc")) { + GH->IO.ioproc = GH->myproc; + GH->IO.nioprocs = GH->nprocs; + GH->IO.ioproc_every = 1; + } else if (Contains("iomode","np")) { + if (Geti("ioproc_every") > GH->nprocs) { + GH->IO.ioproc_every = GH->nprocs; + printf("Reducing ioproc_every to %d\n",GH->IO.ioproc_every); + } else { + GH->IO.ioproc_every = Geti("ioproc_every"); + } + GH->IO.nioprocs = (GH->nprocs) / GH->IO.ioproc_every + + (GH->nprocs % GH->IO.ioproc_every ? 1 : 0); + GH->IO.ioproc = GH->myproc - GH->myproc % GH->IO.ioproc_every; + } else if (Contains_bounded("iomode","onefile")) { + GH->IO.ioproc = 0; + GH->IO.nioprocs = 1; + GH->IO.ioproc_every = GH->nprocs; + } else { + printf ("I don't understand iomode setting. Using one file\n"); + GH->IO.ioproc = 0; + GH->IO.nioprocs = 1; + GH->IO.ioproc_every = GH->nprocs; + } + + GH->IO.downsample = Geti("out_downsample");; + GH->IO.onefileperslice = Contains("onefileperslice","yes"); + + /* At the moment only have unchunked for a single output file */ + if (Contains("iomode","unchunked")) { + if (GH->IO.ioproc_every >= GH->nprocs) + GH->IO.unchunkedresult = 1; + else { + printf("Unchunked out not currently supported for multiple\n"); + printf("output file ... output will be chunked\n"); + GH->IO.unchunkedresult = 0; + } + } else { + GH->IO.unchunkedresult = 0; + } + + /* See if we should be verbose or not */ + if (Contains("IO_verbose","yes")) { + GH->IO.verbose = 1; + } else { + GH->IO.verbose = 0; + } + + /* See what we should output */ + if (Contains("IO_datestamp","no")) { + GH->IO.datestamp = 0; + } else { + GH->IO.datestamp = 1; + } + if (Contains("IO_parameters","no")) { + GH->IO.parameters = 0; + } else { + GH->IO.parameters = 1; + } + if (Contains("IO_scalars","no")) { + GH->IO.scalars = 0; + } else { + GH->IO.scalars = 1; + } + if (Contains("IO_structures","no")) { + GH->IO.structures = 0; + } else { + GH->IO.structures = 1; + } + + /* Check if Panda is on */ +#ifdef THORN_PANDA + GH->IO.panda_on = Contains("panda_active","yes"); +#else + GH->IO.panda_on = 0; +#endif + +#endif /* 0 */ + + /* set staggering flag */ + GH->stagger = staggertype; + + if (Contains("bound","periodic")) { + GH->periodic = 1; + } else { + GH->periodic = 0; + } + + /* Grid function list */ + GH->ngridFuncs = 0; + GH->gridFuncs = NULL; + + /* Array Llist */ + GH->nArrays = 0; + GH->Arrays = NULL; + + /* Allocate the bounds memory */ + GH->lb = (int **)malloc(GH->nprocs*sizeof(int *)); + GH->ub = (int **)malloc(GH->nprocs*sizeof(int *)); + GH->neighbors = (int **)malloc(GH->nprocs*sizeof(int *)); + + GH->lb[0] = (int *)malloc(GH->nprocs*3*sizeof(int)); + for (i=1;i<GH->nprocs;i++) + GH->lb[i] = GH->lb[0] + i*3; + + GH->ub[0] = (int *)malloc(GH->nprocs*3*sizeof(int)); + for (i=1;i<GH->nprocs;i++) + GH->ub[i] = GH->ub[0] + i*3; + + GH->neighbors[0] = (int *)malloc(GH->nprocs*6*sizeof(int)); + for (i=1;i<GH->nprocs;i++) + GH->neighbors[i] = GH->neighbors[0] + i*6; + + /* Remote neighbor points */ + GH->rnpoints = (int *)malloc(GH->nprocs*sizeof(int)); + GH->rnx = (int *)malloc(GH->nprocs*sizeof(int)); + GH->rny = (int *)malloc(GH->nprocs*sizeof(int)); + GH->rnz = (int *)malloc(GH->nprocs*sizeof(int)); + + /* In general - we will noodle this later.*/ + GH->level = 0; + GH->mglevel = 0; + GH->convlevel = 0; + GH->forceSync = 0; + + + /* Finally add myself to the pGH List */ + /* + ltemp = (pGHList *)malloc(sizeof(pGHList)); + ltemp->next = GHList; + ltemp->GH = GH; + GHList = ltemp; + */ +} + + +void pGH_SetupBounds(pGH *GH,int nx,int ny,int nz) { + int i,j,k,l,dir,staglcv; + int xstep, ystep, zstep; + int lnx, lny, lnz; + int *dtindexes, *one, maxs, dirs[3]; + int maxpoints, minpoints; + Double avgpoints; + int istart,jstart,kstart,iend,jend,kend; + int nprocx,nprocy,nprocz; + int staggertype; + + staggertype = GH->stagger; + + nprocx = GH->nprocx; + nprocy = GH->nprocy; + nprocz = GH->nprocz; + + /* Now distribute the points among the processors. */ + + /* fixed work per proc? */ + if (nx < 0) { + for (i=0;i<nprocx;i++) + for (j=0;j<nprocy;j++) + for (k=0;k<nprocz;k++) { + int pnum = i + nprocx*(j+nprocy*k); + + /* Bounding box information */ + + if (i == 0) GH->lb[pnum][0] = 0; + else {GH->lb[pnum][0] = GH->ub[pnum-1][0] + 1 - + 2*(GH->stencil_width); + if(staggertype == PUGH_STAGGER) GH->lb[pnum][0] --; + } + GH->ub[pnum][0] = GH->lb[pnum][0] + abs(nx) - 1; + + if (j == 0) GH->lb[pnum][1] = 0; + else {GH->lb[pnum][1] = GH->ub[pnum-nprocx][1] + 1 - + 2*(GH->stencil_width); + if(staggertype == PUGH_STAGGER) GH->lb[pnum][1] --; + } + GH->ub[pnum][1] = GH->lb[pnum][1] + abs(ny) - 1; + + if (k == 0) GH->lb[pnum][2] = 0; + else {GH->lb[pnum][2] = GH->ub[pnum-nprocx*nprocy][2] + 1 - + 2*(GH->stencil_width); + if(staggertype == PUGH_STAGGER) GH->lb[pnum][2] --; + } + GH->ub[pnum][2] = GH->lb[pnum][2] + abs(nz) - 1; + + } + } + else + { + +#ifdef THORN_BAM_ELLIPTIC + /* BB: for multigrid, boundary points of octants are treated as ghosts + put them on single processor! + + 1. we use the standard algorithm on a grid reduced by stencil_width + in every direction to guarantee the proper grid alignment + */ + if (Contains("grid", "BAM")) + { if( Contains("grid", "octant")) + { GH->nx -= GH->stencil_width; + GH->ny -= GH->stencil_width; + GH->nz -= GH->stencil_width; + } + if( Contains("grid", "bitant")) + GH->nz -= GH->stencil_width; + } +#endif + + xstep = (GH->nx-1) / nprocx; + ystep = (GH->ny-1) / nprocy; + zstep = (GH->nz-1) / nprocz; + + for (i=0;i<nprocx;i++) + for (j=0;j<nprocy;j++) + for (k=0;k<nprocz;k++) { + int pnum = i + nprocx*(j+nprocy*k); + + /* Bounding box information */ + if (i == 0) GH->lb[pnum][0] = 0; + else {GH->lb[pnum][0] = i*xstep +1 - GH->stencil_width; + if(staggertype == PUGH_STAGGER) + GH->lb[pnum][0] --; + } + if (i == nprocx-1) GH->ub[pnum][0] = GH->nx-1; + else GH->ub[pnum][0] = (i+1)*xstep + GH->stencil_width; + + if (j == 0) GH->lb[pnum][1] = 0; + else {GH->lb[pnum][1] = j*ystep +1 - GH->stencil_width; + if(staggertype == PUGH_STAGGER) + GH->lb[pnum][1] --; + } + if (j == nprocy-1) GH->ub[pnum][1] = GH->ny-1; + else GH->ub[pnum][1] = (j+1)*ystep + GH->stencil_width; + + if (k == 0) GH->lb[pnum][2] = 0; + else {GH->lb[pnum][2] = k*zstep +1 - GH->stencil_width; + if(staggertype == PUGH_STAGGER) + GH->lb[pnum][2] --; + } + if (k == nprocz-1) GH->ub[pnum][2] = GH->nz-1; + else GH->ub[pnum][2] = (k+1)*zstep + GH->stencil_width; + + } + +#ifdef THORN_BAM_ELLIPTIC + /* 2. add ghosts to grids at octant boundary using uniform shift */ + + if (Contains("grid", "BAM")) + { if (Contains("grid", "octant")) + { GH->nx += GH->stencil_width; + GH->ny += GH->stencil_width; + GH->nz += GH->stencil_width; + for (i = 0; i < GH->nprocs; i++) { + for (j = 0; j < 3; j++) { + if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width; + GH->ub[i][j] += GH->stencil_width; + } + } + } + if (Contains("grid", "bitant")) + { GH->nz += GH->stencil_width; + for (i = 0; i < GH->nprocs; i++) { + for (j = 2; j < 3; j++) { + if (GH->lb[i][j]) GH->lb[i][j] += GH->stencil_width; + GH->ub[i][j] += GH->stencil_width; + } + } + } + } +#endif + + } +} + + + + +void pGH_SetupOwnership(pGH *GH) { + int i,j,k,l,dir,staglcv; + int xstep, ystep, zstep; + int lnx, lny, lnz; + int *dtindexes, *one, maxs, dirs[3]; + int maxpoints, minpoints; + Double avgpoints; + int istart,jstart,kstart,iend,jend,kend; + int nprocx,nprocy,nprocz; + + maxpoints = 0; minpoints = GH->nx*GH->ny*GH->nz; avgpoints = 0; + + nprocx = GH->nprocx; + nprocy = GH->nprocy; + nprocz = GH->nprocz; + + for (i=0;i<nprocx;i++) + for (j=0;j<nprocy;j++) + for (k=0;k<nprocz;k++) { + int pnum = i + nprocx*(j+nprocy*k); + + /* Neighbors are easy! Just recall that i dir is x dir etc ...*/ + if (i == nprocx-1) + if (GH->periodic) + /* Use the i = 0 processor */ + GH->neighbors[pnum][XDP] = 0 + nprocx*(j+nprocy*k); + else + GH->neighbors[pnum][XDP] = -1; + else + GH->neighbors[pnum][XDP] = (i+1) + nprocx*(j+nprocy*k); + + if (i == 0) + if (GH->periodic) + /* Use the nprocx processor */ + GH->neighbors[pnum][XDM] = nprocx-1 + nprocx*(j+nprocy*k); + else + GH->neighbors[pnum][XDM] = -1; + else + GH->neighbors[pnum][XDM] = (i-1) + nprocx*(j+nprocy*k); + + if (j == nprocy-1) + if (GH->periodic) + /* Use the 0 processor */ + GH->neighbors[pnum][YDP] = (i) + nprocx*(0+nprocy*k); + else + GH->neighbors[pnum][YDP] = -1; + else + GH->neighbors[pnum][YDP] = (i) + nprocx*(j+1+nprocy*k); + + if (j == 0) + if (GH->periodic) + /* Use the nprocy-1 processor */ + GH->neighbors[pnum][YDM] = (i) + nprocx*(nprocy-1+nprocy*k); + else + GH->neighbors[pnum][YDM] = -1; + else + GH->neighbors[pnum][YDM] = (i) + nprocx*(j-1+nprocy*k); + + if (k == nprocz-1) + if (GH->periodic) + /* Use the 0 processor */ + GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*0); + else + GH->neighbors[pnum][ZDP] = -1; + else + GH->neighbors[pnum][ZDP] = (i) + nprocx*(j+nprocy*(k+1)); + + if (k == 0) + if (GH->periodic) + /* Use the nprocz-1 processor */ + GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(nprocz-1)); + else + GH->neighbors[pnum][ZDM] = -1; + else + GH->neighbors[pnum][ZDM] = (i) + nprocx*(j+nprocy*(k-1)); + + /* And save the number of points on each processor + * so we can get at it later ... + */ + GH->rnx[pnum] = (GH->ub[pnum][0]-GH->lb[pnum][0]+1); + GH->rny[pnum] = (GH->ub[pnum][1]-GH->lb[pnum][1]+1); + GH->rnz[pnum] = (GH->ub[pnum][2]-GH->lb[pnum][2]+1); + GH->rnpoints[pnum] = GH->rnx[pnum] * GH->rny[pnum] * GH->rnz[pnum]; + if (GH->rnpoints[pnum] > maxpoints) maxpoints = GH->rnpoints[pnum]; + if (GH->rnpoints[pnum] < minpoints) minpoints = GH->rnpoints[pnum]; + avgpoints += GH->rnpoints[pnum]; + + } + avgpoints = (1.0 * avgpoints) / (GH->nprocs); + + /* Local information */ + GH->npoints = GH->rnpoints[GH->myproc]; + GH->lnx = GH->rnx[GH->myproc]; + GH->lny = GH->rny[GH->myproc]; + GH->lnz = GH->rnz[GH->myproc]; + GH->maxskew = (maxpoints-minpoints)/avgpoints * 100.0; + + + /* So now set up the ownership, ghosts, and overlap */ + + /* Ownership is pretty easy. Remember ownership is indexed as + [stagger][ijk][min/max]. See pGF_Reduction for a use of this, + among others. + Note: Ownership is same for staggered and non-staggered grids + */ + GH->ownership[PUGH_VERTEXCTR][0][0] = (GH->lb[GH->myproc][0] == 0 ? + 0 : GH->stencil_width); + GH->ownership[PUGH_VERTEXCTR][1][0] = (GH->lb[GH->myproc][1] == 0 ? + 0 : GH->stencil_width); + GH->ownership[PUGH_VERTEXCTR][2][0] = (GH->lb[GH->myproc][2] == 0 ? + 0 : GH->stencil_width); + + GH->ownership[PUGH_VERTEXCTR][0][1]=(GH->ub[GH->myproc][0] == GH->nx-1 ? + GH->lnx : GH->lnx - GH->stencil_width); + GH->ownership[PUGH_VERTEXCTR][1][1]=(GH->ub[GH->myproc][1] == GH->ny-1 ? + GH->lny : GH->lny - GH->stencil_width); + GH->ownership[PUGH_VERTEXCTR][2][1]=(GH->ub[GH->myproc][2] == GH->nz-1 ? + GH->lnz : GH->lnz - GH->stencil_width); + /* correct for periodic identification : Tue Jun 17 08:40:15 CDT 1997 */ + if(GH->periodic) + { + GH->ownership[PUGH_VERTEXCTR][0][0] = GH->stencil_width; + GH->ownership[PUGH_VERTEXCTR][1][0] = GH->stencil_width; + GH->ownership[PUGH_VERTEXCTR][2][0] = GH->stencil_width; + GH->ownership[PUGH_VERTEXCTR][0][1] = GH->lnx - GH->stencil_width; + GH->ownership[PUGH_VERTEXCTR][1][1] = GH->lny - GH->stencil_width; + GH->ownership[PUGH_VERTEXCTR][2][1] = GH->lnz - GH->stencil_width; + } + + + if (GH->nx == 1) { + GH->ownership[PUGH_VERTEXCTR][0][0] = 0; + GH->ownership[PUGH_VERTEXCTR][0][1] = 1; + } + if (GH->ny == 1) { + GH->ownership[PUGH_VERTEXCTR][1][0] = 0; + GH->ownership[PUGH_VERTEXCTR][1][1] = 1; + } + if (GH->nz == 1) { + GH->ownership[PUGH_VERTEXCTR][2][0] = 0; + GH->ownership[PUGH_VERTEXCTR][2][1] = 1; + } + + /* Great now set up the overlap region. This is the region we own, + but which is covered by our neighbors ghost. So this is what we + want to SEND + */ + for (dir=0;dir<6;dir++) { + int tmp; + /* We want to send at the dir - 1 */ + istart = 0; iend = GH->lnx; + jstart = 0; jend = GH->lny; + kstart = 0; kend = GH->lnz; + + /* Correct to direction speicif sweeps ... */ + if (dir == 0) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + istart = GH->stencil_width; + iend = istart + GH->stencil_width; + } + if (dir == 1) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = iend; + istart = tmp - 2 * GH->stencil_width; + iend = tmp - GH->stencil_width; + } + + if (dir == 2) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + jstart = GH->stencil_width; + jend = jstart + GH->stencil_width; + } + if (dir == 3) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = jend; + jstart = tmp - 2 * GH->stencil_width; + jend = tmp - GH->stencil_width; + } + + if (dir == 4) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + kstart = GH->stencil_width; + kend = kstart + GH->stencil_width; + } + if (dir == 5) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = kend; + kstart = tmp - 2 * GH->stencil_width; + kend = tmp - GH->stencil_width; + } + GH->overlap[PUGH_VERTEXCTR][dir][0][0] = istart; + GH->overlap[PUGH_VERTEXCTR][dir][0][1] = iend; + GH->overlap[PUGH_VERTEXCTR][dir][1][0] = jstart; + GH->overlap[PUGH_VERTEXCTR][dir][1][1] = jend; + GH->overlap[PUGH_VERTEXCTR][dir][2][0] = kstart; + GH->overlap[PUGH_VERTEXCTR][dir][2][1] = kend; + } + + /* And finally the ghost indices. This used to be in + pGF_FinishRecv.c, but now it is here. + */ + for (dir=0;dir<6;dir++) { + int tmp; + /* We want to send at the dir - 1 */ + istart = 0; iend = GH->lnx; + jstart = 0; jend = GH->lny; + kstart = 0; kend = GH->lnz; + + /* Correct to direction speicif sweeps ... */ + /* Remember this is SW less than the other way... */ + if (dir == 0) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + istart = 0; + iend = GH->stencil_width; + } + if (dir == 1) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = iend; + istart = tmp - GH->stencil_width; + iend = tmp; + } + + if (dir == 2) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + jstart = 0; + jend = GH->stencil_width; + } + if (dir == 3) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = jend; + jstart = tmp - GH->stencil_width; + jend = tmp; + } + + if (dir == 4) { + /* Want to go from sw to sw + sw (eg, 1 to < 2, 2 to < 4) */ + kstart = 0; + kend = GH->stencil_width; + } + if (dir == 5) { + /* Want to go from nx - 2 sw to nx - sw + (eg nx-2 to < nx-1, nx-4 to < nx-2) + Luckily iend is aready set to nx ... */ + tmp = kend; + kstart = tmp - GH->stencil_width; + kend = tmp; + } + + GH->ghosts[PUGH_VERTEXCTR][dir][0][0] = istart; + GH->ghosts[PUGH_VERTEXCTR][dir][0][1] = iend; + GH->ghosts[PUGH_VERTEXCTR][dir][1][0] = jstart; + GH->ghosts[PUGH_VERTEXCTR][dir][1][1] = jend; + GH->ghosts[PUGH_VERTEXCTR][dir][2][0] = kstart; + GH->ghosts[PUGH_VERTEXCTR][dir][2][1] = kend; + } + + /* if staggering, set up ownership, overlaps, ghosts */ + if(GH->stagger == PUGH_STAGGER) { + /* copy ownership from PUGH_VERTEXCTR */ + for(i=1;i<PUGH_NSTAGGER;i++) + for(j=0;j<3;j++) + for(k=0;k<2;k++) + GH->ownership[i][j][k] = GH->ownership[PUGH_VERTEXCTR][j][k]; + + /* correct ownership for staggered grids */ + for(i=0;i<PUGH_NSTAGGER;i++) + { + /* x-dir */ + if(GH->ub[GH->myproc][0] != GH->nx-1) + GH->ownership[i][0][1] --; + else + if(i == PUGH_FACECTR_X) + GH->ownership[i][0][1] --; + /* y-dir */ + if(GH->ub[GH->myproc][1] != GH->ny-1) + GH->ownership[i][1][1] --; + else + if(i == PUGH_FACECTR_Y) + GH->ownership[i][1][1] --; + /* z-dir */ + if(GH->ub[GH->myproc][2] != GH->nz-1) + GH->ownership[i][2][1] --; + else + if(i == PUGH_FACECTR_Z) + GH->ownership[i][2][1] --; + } + + /* set overlaps for staggered grids */ + for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++) + for(dir=0;dir<6;dir++) + { + istart = 0; iend = GH->lnx; + jstart = 0; jend = GH->lny; + kstart = 0; kend = GH->lnz; + if(staglcv == PUGH_FACECTR_X) iend --; + if(staglcv == PUGH_FACECTR_Y) jend --; + if(staglcv == PUGH_FACECTR_Z) kend --; + + if(dir == 0) + { + istart = GH->stencil_width; + iend = 2 * GH->stencil_width; + if(staglcv != PUGH_FACECTR_X) + { + istart ++; + iend ++; + } + } + if(dir == 1) + { + istart = GH->lnx - 2 * GH->stencil_width -1; + iend = istart + GH->stencil_width; + } + if(dir == 2) + { + jstart = GH->stencil_width; + jend = 2 * GH->stencil_width; + if(staglcv != PUGH_FACECTR_Y) + { + jstart ++; + jend ++; + } + } + if(dir == 3) + { + jstart = GH->lny - 2 * GH->stencil_width -1; + jend = jstart + GH->stencil_width; + } + if(dir == 4) + { + kstart = GH->stencil_width; + kend = 2 * GH->stencil_width; + if(staglcv != PUGH_FACECTR_Z) + { + kstart ++; + kend ++; + } + } + if(dir == 5) + { + kstart = GH->lnz - 2 * GH->stencil_width -1; + kend = kstart + GH->stencil_width; + } + GH->overlap[staglcv][dir][0][0] = istart; + GH->overlap[staglcv][dir][0][1] = iend; + GH->overlap[staglcv][dir][1][0] = jstart; + GH->overlap[staglcv][dir][1][1] = jend; + GH->overlap[staglcv][dir][2][0] = kstart; + GH->overlap[staglcv][dir][2][1] = kend; + } + + + /* set ghosts for staggered grids */ + for(staglcv=0;staglcv<PUGH_NSTAGGER;staglcv++) + for(dir=0;dir<6;dir++) + { + istart = 0; iend = GH->lnx; + jstart = 0; jend = GH->lny; + kstart = 0; kend = GH->lnz; + if(staglcv == PUGH_FACECTR_X) iend --; + if(staglcv == PUGH_FACECTR_Y) jend --; + if(staglcv == PUGH_FACECTR_Z) kend --; + + if(dir == 0) + { + istart = 0; + iend = GH->stencil_width; + } + if(dir == 1) + { + istart = GH->lnx - GH->stencil_width; + iend = GH->lnx; + if(staglcv == PUGH_FACECTR_X) + { + istart --; + iend --; + } + } + if(dir == 2) + { + jstart = 0; + jend = GH->stencil_width; + } + if(dir == 3) + { + jstart = GH->lny - GH->stencil_width; + jend = GH->lny; + if(staglcv == PUGH_FACECTR_Y) + { + jstart --; + jend --; + } + } + if(dir == 4) + { + kstart = 0; + kend = GH->stencil_width; + } + if(dir == 5) + { + kstart = GH->lnz - GH->stencil_width; + kend = GH->lnz; + if(staglcv == PUGH_FACECTR_Z) + { + kstart --; + kend --; + } + } + GH->ghosts[staglcv][dir][0][0] = istart; + GH->ghosts[staglcv][dir][0][1] = iend; + GH->ghosts[staglcv][dir][1][0] = jstart; + GH->ghosts[staglcv][dir][1][1] = jend; + GH->ghosts[staglcv][dir][2][0] = kstart; + GH->ghosts[staglcv][dir][2][1] = kend; + } + } + + +#ifdef MPI + /* Great, now we have made these, lets set up the derived data types */ + for (i=0;GH->commmodel==PUGH_DERIVEDTYPES && i<PUGH_NSTAGGER;i++) { + int *dtindices, *one, dir; + int ii,jj,kk,ll; + + /* OK so now lets set up the communications datatypes */ + dirs[0] = GH->lny * GH->lnz; + dirs[1] = GH->lnx * GH->lnz; + dirs[2] = GH->lnx * GH->lny; + + maxs = dirs[0]; + maxs = (dirs[1] > maxs ? dirs[1] : maxs); + maxs = (dirs[2] > maxs ? dirs[2] : maxs); + + maxs *= GH->stencil_width; + + + dtindices = (int *)malloc(maxs*sizeof(int)); + one = (int *)malloc(maxs*sizeof(int)); + + for (dir=0;dir<6;dir++) { + /* Great so now calculate. Sends first */ + ll = 0; + for (kk = GH->overlap[i][dir][2][0]; + kk < GH->overlap[i][dir][2][1]; + kk ++) + for (jj = GH->overlap[i][dir][1][0]; + jj < GH->overlap[i][dir][1][1]; + jj ++) + for (ii = GH->overlap[i][dir][0][0]; + ii < GH->overlap[i][dir][0][1]; + ii ++) { + dtindices[ll] = DATINDEX(GH,ii,jj,kk); + one[ll] = 1; + ll++; + } + MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE, + &(GH->send_dt[i][dir])); + CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->send_dt[i][dir]))); + /* And recieves */ + ll = 0; + for (kk = GH->ghosts[i][dir][2][0]; + kk < GH->ghosts[i][dir][2][1]; + kk ++) + for (jj = GH->ghosts[i][dir][1][0]; + jj < GH->ghosts[i][dir][1][1]; + jj ++) + for (ii = GH->ghosts[i][dir][0][0]; + ii < GH->ghosts[i][dir][0][1]; + ii ++) { + dtindices[ll] = DATINDEX(GH,ii,jj,kk); + one[ll] = 1; + ll++; + } + MPI_Type_indexed(ll,one,dtindices,PUGH_MPI_TYPE, + &(GH->recv_dt[i][dir])); + CACTUS_MPI_ERROR(MPI_Type_commit(&(GH->recv_dt[i][dir]))); + } + + free(dtindices); + free(one); + } +#endif + +} + +#ifdef 0 + + /*@@ + @routine DestroyPGH + @date Thu Aug 21 11:38:10 1997 + @author Paul Walker + @desc + Destroys a GH object. + @enddesc +@@*/ +void DestroyPGH(pGH **GHin) { + /* First remove me from the list. */ + pGHList *tmp, *x; + pGH *GH; + int i; + int didit; + + GH = *GHin; + +#ifdef MPI + CACTUS_MPI_ERROR(MPI_Comm_free(&(GH->PUGH_COMM_WORLD))); +#endif + + didit = 0; + if (GHList->GH == GH) { + x = GHList; + GHList = GHList->next; + didit = 1; + } else { + for (tmp = GHList; tmp->next && tmp->next->GH != GH; tmp = tmp->next) { + /* Do nothing */ + } + if (tmp->next) { + x = tmp->next; + tmp->next = tmp->next->next; + didit = 1; + } + } + if (!didit) { + printf ("Can't find GH in list\n"); + } else { + free(x); + } + + /* Great. Now go about the work of destroying me. */ + for (i=0;i<GH->ngridFuncs;i++) + DestroyPGF(GH, &(GH->gridFuncs[i])); + + for (i=0;i<GH->nArrays;i++){ + fflush(stdout); + DestroyPGArray(GH,&(GH->Arrays[i])); + } + + free(GH->gridFuncs); + free(GH->Arrays); + + free(GH->lb[0]); + free(GH->lb); + + free(GH->ub[0]); + free(GH->ub); + + free(GH->rnpoints); + free(GH->rnx); + free(GH->rny); + free(GH->rnz); + + free(GH->neighbors[0]); + free(GH->neighbors); + + free(GH); + *GHin = NULL; + + fflush(stdout); +} + +#endif + + /*@@ + @routine ProcTop + @date Thu May 1 10:13:40 1997 + @author Paul Walker + @desc + Figures out a reasonable processor topology for a square + grid on n processors. That is, it figures out a,b,c such + that a*b*c = n and a,b, and c are in some senses minimal. + <p> + This algorithm will not guarantee the minimal condition, + or at least I can't prove it does, but it seems to on + a wide variety of processor numbers. So stop being a + weenie - not everything needs a proof! + <p> + Thie came to me in the shower, and is much better than + the old way I was doing it. I'm considering having my + working environment be the shower for the rest of time, + but there are some problems waterproofing my workstation. + @enddesc + @calledby SetupPGH +@@*/ + +void ProcTop(int np, int *npx, int *npy, int *npz) { + int x,y,z,r; + + if (Contains("proc_topology","manual")) { + x = Geti("proc_top_nx"); + y = Geti("proc_top_ny"); + z = Geti("proc_top_nz"); + if (x * y * z != np) { + printf ("Manual processor topology error. %d x %d x %d != %d\n", + x,y,z,np); +#ifdef MPI + CACTUS_MPI_ERROR(MPI_Finalize()); +#endif + exit(0); + } + } else { + z = (int)(pow(np,1.0/3.0)+0.0001); + while (np % z != 0) z--; + r = np / z; + y = (int)(sqrt(r)+0.0001); + while (r % y != 0) y--; + x = r / y; + +#ifdef THORN_CARTOON_2D + if (Contains("cartoon_active","yes")) { + z = (int)(sqrt(np)+0.0001); + while (np % z != 0) z--; + y = 1; + x = np / z; + } +#endif + + if (x * y * z != np) { + printf ("FATAL ERROR in Processor Topology\n"); + printf ("This should never happen. Please report ASAP!\n"); + exit(0); + } + } + + /* NB: PW reverses these for better alignment of comm groups */ + *npz = x; + *npy = y; + *npx = z; + +} diff --git a/src/Startup.c b/src/Startup.c new file mode 100644 index 0000000..1f2f25a --- /dev/null +++ b/src/Startup.c @@ -0,0 +1,67 @@ + /*@@ + @file Startup.c + @date Wed Feb 3 23:10:19 1999 + @author Tom Goodale + @desc + Startup routines for pugh. + @enddesc + @@*/ + +#include <stdio.h> + +#include "flesh.h" +#include "GHExtensions.h" + +#include "pugh_extension.h" +#include "pugh_Comm.h" + +static char *rcsid="$Header$"; + +/* Store the handle in a global variable for the moment. */ +int PUGH_GHExtension; + + + /*@@ + @routine pugh_Startup + @date Wed Feb 3 23:14:38 1999 + @author Tom Goodale + @desc + The startup registration routine for PUGH. + @enddesc + @calls + @calledby + @history + + @endhistory + +@@*/ +void pugh_Startup(void) +{ + printf("Registering PUGH GH extension\n"); + + /* Register the stuff in the GH extension. */ + PUGH_GHExtension = CCTK_RegisterGHExtension("PUGH"); + + CCTK_RegisterGHExtensionSetupGH(PUGH_GHExtension, pugh_SetupGH); + + CCTK_RegisterGHExtensionInitGH(PUGH_GHExtension, pugh_InitGH); + + CCTK_RegisterGHExtensionrfrTraverseGH(PUGH_GHExtension, pugh_rfrTraverseGH); + + /* Overload some functions */ + CCTK_OverloadSyncGroup(pugh_SyncGroup); + CCTK_OverloadEnableGroupStorage(pugh_EnableGroupStorage); + CCTK_OverloadDisableGroupStorage(pugh_EnableGroupStorage); + + CCTK_OverloadEnableGroupComm(pugh_EnableGroupComm); + CCTK_OverloadDisableGroupComm(pugh_EnableGroupComm); + + CCTK_OverloadBarrier(pugh_Barrier); + CCTK_OverloadReduce(pugh_Reduce); + CCTK_OverloadInterp(pugh_Interp); + + CCTK_OverloadParallelInit(pugh_ParallelInit); + CCTK_OverloadExit(pugh_Exit); + CCTK_OverloadAbort(pugh_Abort); + +} diff --git a/src/include/pGArray.h b/src/include/pGArray.h new file mode 100644 index 0000000..a30b04b --- /dev/null +++ b/src/include/pGArray.h @@ -0,0 +1,20 @@ + /*@@ + @file pGArray.h + @date + @author + @desc + @enddesc + @version $Id$ + @@*/ + +typedef struct PGARRAY { + char *name; + int pgano; + Double *data; + + int npoints; + struct PGH *parentGH; + + int do_arrayio; + IOFile IEEEfile; +} pGArray; diff --git a/src/include/pGF.h b/src/include/pGF.h new file mode 100644 index 0000000..d8dd2fa --- /dev/null +++ b/src/include/pGF.h @@ -0,0 +1,100 @@ + /*@@ + @header pGF.h + @date Fri Feb 21 10:16:32 1997 + @author Paul Walker + @desc + The pugh Grid Function structure. This is the heart of + the data storage and the like. You can't have a GF + without a GH, so you'd better see @seefile pGH.h + also. + <p> + One wonderfully new feature that a pGF has is that it contains + both a Double* padddata and a Double* data. What is the difference? + Well data is the base address of the fortran data set. It points + into padddata. This allows us, if we want to, to explicitly move + the starting point of the array to align with cache lines. + Important on some architectures. So the deal is if we are + in padding mode (see @seeroutine EnableGFDataStorage fro more + on that) padddata will be bigger than data, and data will point + to an address insinde padddata and such that lnx * lny * lnz + further down the data pointer is still in padddata. If we are + not in padding mode, data = padddata as pointers. + <p> + The only place we explicitly reference padddata is when we + create or de-allocate data. <b>All other operatoins shold + be done on *data</a> + @enddesc + @version $Id$ + @@*/ + +#define DATINDEX(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k))) +#define DI(GH,i,j,k) ((i) + GH->lnx*((j)+GH->lny*(k))) +#define GDATINDEX(GH,i,j,k) ((i) + GH->nx*((j)+GH->ny*(k))) + +#ifndef IEEEIO +typedef long IOFile; +#endif + +typedef struct PGF { + char *name; /* The name of the grid function */ + int gfno; /* My ID number in my GH parent. */ + Double *padddata; /* Storage for the data. */ + Double *data; /* See the note above. */ + int buffer_sz[6]; /* Size of the face ghost zones */ + Double **send_buffer; /* Storage for buffered Comm if */ + Double **recv_buffer; /* we don't use derived types */ + int commflag; /* What is the comm flag set to? */ + int docomm[6]; /* Do we do comm or not? */ + int storage; /* Do we have storage or not? */ + int stagger; /* Only Vertex Centered now... */ + + struct PGH *parentGH; /* The GH to which I belong */ + /* Note this is struct PGH whic is + typedeffed to pGH in pGH.h, but + that is included AFTER this so we + need the full name for the lookahead + structure reference thingy. + */ + + /* IO Related things */ + FILE *convfile; /* File for convergence information */ + FILE **xgfile; /* x,y,z and dline file pointers */ + FILE **zerodfile; /* min,max,norm1,norm2 file pointers */ + IOFile IEEEfile; /* File for IEEEIO */ + char IEEEfname[512]; /* IEEEIO 3D File name (to preserve handles + we will calculate this once and then + perhaps reopen and close it). + */ + IOFile IEEEfile_2d[3]; /* Two-D IEEEIO File in each dir */ + int do_3dio; /* Do 3D I/O state flag */ + int do_1dio; /* Do 1D I/O state flag */ + int do_2dio; /* Do 2D I/O state flag */ + int do_0dio; /* Do 0D (eg, min max) I/O */ + int show_maxval; /* Show runtime maxval state flag */ + int do_conv; /* Convegence test this function? */ + int lastio_it[4]; /* Last iteration for I/O at 1, 2, and 3D. + We need this since we don't want to + multiply write data, which we can do + in convergence mode. So rather than + be clever we just store the iteration + when we write, and then don't write at + the same iteration twice. + */ + + Double maxval, minval; /* To store in case we toggle storage */ + Double norm1, norm2; +#ifdef MPI + MPI_Request sreq[6], rreq[6]; /* Comm requests and statuses. */ + MPI_Status ms; +#endif + +#ifdef 0 +#include "pGF_Extensions.h" + +#ifdef THORN_EXACT /* thorn Comp and thorn Exact merge */ +#include "thorn_exact/src/include/pGF_ComparisonExtensions.h" +#endif + +#endif + +} pGF; diff --git a/src/include/pGH.h b/src/include/pGH.h new file mode 100644 index 0000000..4318612 --- /dev/null +++ b/src/include/pGH.h @@ -0,0 +1,132 @@ + /*@@ + @header pGH.h + @date Fri Feb 21 10:12:58 1997 + @author + @desc + The Pugh Grid Hierarchy structure. There isn't much point in having + a GridHierarchy without GridFunctions, so you'd better see + @seefile pGF.h also. + @enddesc + @history + @hauthor Gabrielle Allen + @hdate Sep 10 @hdesc Added the iteration number on a GH + @endhistory + @history + @hauthor Gabrielle Allen @hdate 26 Sep 1998 + @desc Changed IO parameter names, removed GH->iomode, added t_iopars + structure + @hauthor Gabrielle Allen @hdate 17 Oct 1998 + @desc Added more IO parameters to IO structure + structure + @hauthor Gabrielle Allen @hdate 3 Nov 1998 + @desc Added forceSync + @endhistory + @version $Id$ + @@*/ + + +typedef struct PGH { + +#ifdef MPI + MPI_Comm PUGH_COMM_WORLD; /* A MPIcommunicator */ +#endif + + /* Size of the processor group */ + int nprocs; /* Number of processors */ + int myproc; /* My processor */ + int nprocx,nprocy,nprocz; /* Processor topology */ + int commmodel; /* Comm model is PUGH_ALLOCATEDBUFFERS */ + /* or PUGH_DERIVEDTYPES. Currently unused */ + + /* Size of the problems */ + int ngridFuncs; /* # of Grid Functions */ + pGF **gridFuncs; /* Pointers to them */ + int nx,ny,nz; /* Physical size of the Problem */ + int stencil_width; /* Width of ghost zone */ + + int nArrays; /* # of pGArrays */ + pGArray **Arrays; /* Pointers to them */ + + /* Processor group layouts */ + Double maxskew; /* Maximum point skew */ + int periodic; /* Is the system periodic? */ + int **lb; /* Lower bound (nprocs X 3) for each proc */ + int **ub; /* Upper bound (same sizes) */ + int lnx, lny, lnz; /* Size on this processor */ + int npoints; /* LOCAL number of points on this proc. */ + int *rnpoints; /* Number of points on each proc */ + int *rnx, *rny, *rnz; /* Number of points on each proc */ + int **neighbors; /* An array of size nprocs X 6 of the + * neighbors on each side. The entry in + * this array is the processor number of + * the neighbor. + */ + int stagger; /* Is staggering turned on? */ + int spxyz[3]; /* From what index to start slicing: 1 for octant, */ + /* nx/2 for box, e.g. Set in SetupSlicingCenter, */ + /* used in Write3D and modfied in in BoxInBox.c ! */ + int forceSync; /* force synchronisation of GFs with storage */ + + int sp2dxyz[3]; /* Slice point for two d slices. */ + + /* Information about ghosts and overlaps. */ + int ownership[PUGH_NSTAGGER][3][2]; /* The box owned in each direction. */ + /* Indexing here is [stagger][ijk][min/max]*/ + + int ghosts[PUGH_NSTAGGER][6][3][2]; /* The ghost zones on each face. */ + /* Indexing here is */ + /* [stagger][face][ijk][min/max] */ + + int overlap[PUGH_NSTAGGER][6][3][2]; /* The overlap region owned on each face. */ + /* Indexing here is */ + /* [stagger][face][ijk][min/max] */ + + + /* Coordinate information */ + Double cx0, cy0, cz0; /* Origin of our system */ + Double dx0, dy0, dz0, dt0; /* Delta of our system */ + Double lx0, ly0, lz0; /* Processor-Local coordinate origins */ + Double phys_time; /* physical time */ + int GHiteration; /* iteration number on processor */ + + + + + /* currently, all GHs are listed in a single, global GHList */ + /* they are distinguished by a unique combination of level numbers */ + int level; /* level in Berger-Oliger stack */ + int mglevel; /* level in multigrid stack */ + int convlevel; /* level in convergence stack */ + + int active; /* 1 -> GH active; 0 -> GH is skipped for conv.test */ + /* is used in NanCheckGH.c */ +#ifdef MPI + /* Derived data types for communication */ + MPI_Datatype send_dt[PUGH_NSTAGGER][6], recv_dt[PUGH_NSTAGGER][6]; +#endif + + /* Extension of this structure can be brought in by thorns */ + /* +#include "pGH_Extensions.h" + */ +} pGH; + + +/* This defines a single linked list of grid structures */ +/* the first item has level=0, next level=1, etc.. */ + +typedef struct pGHLIST { + pGH *GH; + struct pGHLIST *next; +} pGHList; + + + +#define XDP 1 +#define XDM 0 +#define YDP 3 +#define YDM 2 +#define ZDP 5 +#define ZDM 4 + + diff --git a/src/include/pugh.h b/src/include/pugh.h new file mode 100644 index 0000000..673eea0 --- /dev/null +++ b/src/include/pugh.h @@ -0,0 +1,150 @@ +/*@@ + @header pugh.h + @author Paul Walker + @date March 1997 + @desc + This is just a bunch of includes and some simple definitions. + You should really see @seefile pGF.h @seefile pGH.h and + @seefile pughProtos.h for the pugh bit, and @seefile + pughProblem.h for the cactus-specific bits. + @enddesc + @version $Id$ +@@*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <malloc.h> +#include <assert.h> + +/* 3 ways up shutting down: */ +#define STOP cshutdown(0); +#define ABORT cabort(0); +#define CORE ccore(); + +/* Two ways to shut down with a non-zero exit code. */ +#define ERROR_STOP(xerrcode) cshutdown(xerrcode); +#define ERROR_ABORT(xerrcode) cabort(xerrcode); + + +/* We have our own Cactus memory routines in pughutils.c */ + +#if defined(SGI) || defined(T3E) +#undef malloc +#define malloc(x) cactus_malloc(x) +void *cactus_malloc(size_t); +#undef free +#define free(x) cactus_free(x) +void cactus_free(void *); +#endif + +#ifdef MPI +#include "mpi.h" +#endif + +#ifdef HDF +#include "hdf.h" +#endif + +#ifdef IEEEIO +#include "IEEEIO.h" +#endif + +#ifdef Panda_IO +#include "PandaIO.h" +#endif + +#ifdef TCP +#include "TCP++/DataConnection.h" +#include "TCP++/CommandProcessor.h" +#endif + +/* Handle precision */ +#ifdef SINGLE_PRECISION +#define PUGH_MPI_TYPE MPI_FLOAT +typedef float Double; +#else +#define PUGH_MPI_TYPE MPI_DOUBLE +typedef double Double; +#endif + +#include "pugh_constants.h" + +#ifdef 0 +/* We need to include active thorns here so that the thorns can + modify the grid function and grid hierarchy (as box in box does). + */ +#include "pughActiveThorns.h" + +#ifdef THORN_BOXINBOX +#include "AMRwriter.h" +#endif + +#endif + +#include "pGF.h" +#include "pGArray.h" +#include "pGH.h" + + +#ifdef 0 +/* Timing stuff */ +#include "pughTimers.h" + +#include "pughProtos.h" + + + +#ifndef WIN32 +#define BOLDON "\033[1m" +#define BOLDOFF "\033[0m" +#else +#define BOLDON "" +#define BOLDOFF "" +#endif + +#if (defined(RS6000) || defined(SPX)||defined(HP)) +#define FORTRAN_NAME(x,y,z) z +#else +#if (defined(nCUBE2) || defined(CRAYC90) || defined(CRAYT3D) || defined (CRAYT3E)) +#define FORTRAN_NAME(x,y,z) y +#else +#if (defined(WIN32)) +#define FORTRAN_NAME(x,y,z) y +#else +#define FORTRAN_NAME(x,y,z) x +#endif +#endif +#endif + +#endif /*0*/ + +#ifdef MPI + +#define CACTUS_MPI_ERROR(xf) do {int errcode; \ + if((errcode = xf) != MPI_SUCCESS) \ + { \ + char mpi_error_string[MPI_MAX_ERROR_STRING+1]; \ + int resultlen; \ + MPI_Error_string(errcode, mpi_error_string, &resultlen);\ + fprintf(stderr, "MPI Call %s returned error code %d (%s)\n", \ + #xf, errcode, mpi_error_string); \ + fprintf(stderr, "At line %d of file %s\n", \ + __LINE__, __FILE__); \ + } \ + } while (0) +#endif + + +#include "pughDriver.h" + +#ifdef 0 +/* Runtime function repository */ +#include "rfr.h" + +/* Scalar Manipulation Functions. */ + +#include "CactusScalarProtos.h" + +#endif diff --git a/src/include/pughDriver.h b/src/include/pughDriver.h new file mode 100644 index 0000000..73415ea --- /dev/null +++ b/src/include/pughDriver.h @@ -0,0 +1,68 @@ +/*@@ + @header pughDriver.h + @author Paul Walker + @date March 1997 + @desc + This is stuff having to do with the driver, namely + the problem and globals, as well as definitions of what + the fort interfaces are. Also see @seefile pughProblem.h + for what the cactus-specific stuff is. The macros FORT_ARGS + and FORT_ARGS_PROTO passed from c routines must match + FORT_GH_DECOMP and FORT_GHDECOMP_DECL received in fortran + routines defined in @seefile pughFortran.h + @enddesc + @history + @hauthor Gabrielle Allen + @hdate Sep 10 @hdesc Added the GH iteration number to fortran macros + @hauthor Gabrielle Allen @hdate 29 Sep 1998 + @hdesc Removed levfac + @endhistory + @version $Id$ + @@*/ + +#ifdef 0 +extern pGHList *GHList; + +extern int nx0, ny0, nz0; +extern int lb[3],ub[3],sh[3],bbox[6]; +extern int convergence; + +/* Timing variables */ +extern double commt[PUGH_NTIMERS]; +extern double elltime[PUGH_NTIMERS]; + +/* How long to run */ +extern int itfirst, itlast, itout; /* Iteration runs */ +extern int itout3, itout2, itout1, itout0, itoutarr, itinfo; + +extern int _show_storage; + +/* termination flags: pe-local and global: */ +extern int cactus_terminate; + +/* FORT_ARGS must match FORT_ARGS_PROTO below, and FORT_GH_DECOMP, + FORT_GHDECOMP_DECL in pughFortran.h */ +#define FORT_ARGS(xGH) xGH, \ + &(xGH->lnx),&(xGH->lny), &(xGH->lnz),\ + lb,ub,sh,bbox, \ + &(xGH->nx),&(xGH->ny),&(xGH->nz),\ + &(xGH->cx0),&(xGH->cy0),&(xGH->cz0), \ + &(xGH->dx0),&(xGH->dy0),&(xGH->dz0),&(xGH->dt0), &(xGH->phys_time), \ + &(xGH->level), &(xGH->nprocs), &(xGH->myproc), \ + &(xGH->convlevel), &(xGH->stencil_width), &(xGH->GHiteration) + +/* FORT_ARGS_PROTO must match FORT_ARGS above, and FORT_GH_DECOMP, + FORT_GHDECOMP_DECL in pughFortran.h */ +#define FORT_ARGS_PROTO pGH *, \ + int *, int *, int *, \ + int *, int *, int *, int *, \ + int *, int *, int *, \ + Double *, Double *, Double *, \ + Double *, Double *, Double *, Double *, Double *, \ + int *, int *, int *, \ + int *, int *, int * + + +#include "pughProblem.h" + +#endif diff --git a/src/include/pughProblem.h b/src/include/pughProblem.h new file mode 100644 index 0000000..2024a09 --- /dev/null +++ b/src/include/pughProblem.h @@ -0,0 +1,53 @@ +/*@@ + @header pughProblem.h + @author Paul Walker + @date March 1997 + @desc + This routine declares all the problem-specific globals + and teaches the driver how to make the grid functions + with the right names, and pass them to fortran. It + isn't that exciting, but it's pretty damned important + if you need to add new fields. + @enddesc + @version $Id$ + @@*/ + +#ifdef 0 +#include "gfIndices.h" +#include "pughProblemDefs.h" + +/* Variables that ALL codes use. */ +#define USER_VARS \ + int *_one; + +/* Main externals */ +extern int *_one; + +/* Historical: Two macros for the same thing. PW. */ +#define PASS_FORT_FIELDS(xGH) \ + PASS_FORT_FIELDS_SUBSET(xGH) + + +/* If you add items to PASS_FORT_FIELDS make sure */ +/* you add the right type at the right position */ +/* in PROTO_FORT_FIELDS */ +#define PROTO_FORT_FIELDS \ + PROTO_FORT_FIELDS_SUBSET + + +#ifdef WIN32 +#define FMODIFIER __stdcall +#else +#define FMODIFIER +#endif + +#ifdef THORN_CHECKPOINT + /* Is it possible to ifdef this? Or perhaps to rfr it? */ +#define f_recover FORTRAN_NAME(recover_,RECOVER,recover) +void FMODIFIER f_recover(FORT_ARGS_PROTO,PROTO_FORT_FIELDS); +#endif + +#include "cactus_constants.h" + +#endif + diff --git a/src/include/pugh_constants.h b/src/include/pugh_constants.h new file mode 100644 index 0000000..f8bc138 --- /dev/null +++ b/src/include/pugh_constants.h @@ -0,0 +1,64 @@ + /*@@ + @file pugh_constants.h + @date + @author + @desc + @enddesc + @version $Id$ + @@*/ + +/* Pre-processor constants start with PUGH_ */ + +/* Constants for the comm directions */ +#define PUGH_NOCOMM 0 +#define PUGH_ALLCOMM 1 +#define PUGH_XCOMM 2 +#define PUGH_YCOMM 3 +#define PUGH_ZCOMM 4 +#define PUGH_PLUSFACESCOMM 5 +#define PUGH_MINUSFACESCOMM 6 + +/* Constants for the storage */ +#define PUGH_NOSTORAGE 0 +#define PUGH_STORAGE 1 + +/* Constants for the GF Type. Only one now ... */ +/* Number of staggerings available in the code */ +#define PUGH_NSTAGGER 4 + +/* The index for each staggering type */ +/* This classifies each PGF member */ +#define PUGH_VERTEXCTR 0 +#define PUGH_FACECTR_X 1 +#define PUGH_FACECTR_Y 2 +#define PUGH_FACECTR_Z 3 + +/* The index for each staggering type */ +/* This determines the PGH should be set up */ +#define PUGH_NO_STAGGER 0 +#define PUGH_STAGGER 1 + +/* Comm types available */ +#define PUGH_NOCOMMBUFFERS 0 +#define PUGH_ALLOCATEDBUFFERS 1 +#define PUGH_DERIVEDTYPES 2 + +/* Termination flags */ +#define TERMINATION_NOT_RAISED 0 /* no termination flag, inital setting */ +#define TERMINATION_RAISED_LOCAL 1 /* raised on one pe, has to be broadcasted */ +#define TERMINATION_RAISED_BRDCAST 2 /* flag now available on all pes: termiante */ + +/* IO precision */ +#ifdef SINGLE_PRECISION +#define IO_FLOAT FLOAT32 +#else +#define IO_FLOAT FLOAT64 +#endif + +/* IO precision */ +#ifdef SINGLE_PRECISION +#define CACTUS_FLOAT FLOAT32 +#else +#define CACTUS_FLOAT FLOAT64 +#endif + diff --git a/src/make.code.defn b/src/make.code.defn new file mode 100644 index 0000000..640faaa --- /dev/null +++ b/src/make.code.defn @@ -0,0 +1,8 @@ +# Main make.code.defn file for thorn pugh +# : /usr/users/cactus/CCTK/lib/make/new_thorn.pl,v 1.1 1999/02/03 17:00:50 goodale Exp n +# Source files in this directory +SRCS = Startup.c GHExtension.c Comm.c SetupPGH.c + +# Subdirectories containing source files +SUBDIRS = + diff --git a/src/pugh/SetupPGArray.c b/src/pugh/SetupPGArray.c new file mode 100644 index 0000000..9a5cf71 --- /dev/null +++ b/src/pugh/SetupPGArray.c @@ -0,0 +1,84 @@ + /*@@ + @file SetupArray.c + @date Fri Feb 21 11:04:20 1997 + @author + @desc + Sets up (eg allocates) the pugh Array. This is trivial. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine SetupPGArray + @date Fri Feb 21 11:05:04 1997 + @author + @desc + Note that npoints is a GLOBAL measure. + @enddesc + @calls DisableGFDataStorage, EnableGFDataStorage +@@*/ + + +pGArray *SetupPGArray(pGH *GH, char *name, int npoints) { + pGArray *res; /* The result */ + pGArray **rlist; /* A temporary to add to the GH */ + int lnpoints, i; + + /* Space for the struct */ + res = (pGArray *)malloc(sizeof(pGArray)); + + /* Set my parent GH */ + res->parentGH = GH; + + /* Set up the name */ + res->name = (char *)malloc(strlen(name)*sizeof(char)); + strcpy(res->name,name); + + /* Allocate my memory */ + if (npoints > 0) { + lnpoints = npoints/GH->nprocs; + if (GH->myproc == GH->nprocs - 1) { + lnpoints = npoints - lnpoints * (GH->nprocs-1); + } + res->npoints = lnpoints; + res->data = (Double *)malloc(lnpoints*sizeof(Double)); + } else { + res->npoints = 1; + res->data = (Double *)malloc(sizeof(Double)); + } + + /* Setup IO stuff to default no IO with space for xgraph files.*/ + res->do_arrayio = 0; + + res->IEEEfile = NULL; + + /* Finally add this to the list of grid funcs in the gh */ + GH->nArrays ++; + rlist = (pGArray **)malloc(GH->ngridFuncs * sizeof(pGArray *)); + if (GH->nArrays > 1) { + for (i=0;i<GH->nArrays-1;i++) + rlist[i] = GH->Arrays[i]; + free(GH->Arrays); + } + res->pgano = GH->nArrays - 1; + rlist[GH->nArrays-1] = res; + GH->Arrays = rlist; + + /* And return myself... */ + return res; +} + +void DestroyPGArray(pGH *GH, pGArray **pGAin) { + pGArray *pGA; + pGA = *pGAin; + + free(pGA->name); + free(pGA->data); + + free(pGA); + *pGAin=NULL; +} diff --git a/src/pugh/SetupPGF.c b/src/pugh/SetupPGF.c new file mode 100644 index 0000000..acec587 --- /dev/null +++ b/src/pugh/SetupPGF.c @@ -0,0 +1,438 @@ + /*@@ + @file SetupPGF.c + @date Fri Feb 21 11:04:20 1997 + @author + @desc + Sets up (eg allocates) the pugh Grid Function. This is + crucial to understaiding the grid function struct. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine SetupPGF + @date Fri Feb 21 11:05:04 1997 + @author + @desc + Allocates the Grid Func structure (I wanted to say object ...) + There are several steps to this + <ul> + <li> Set up the name + <li> Set up the docomm flag and IO flags + <li> Allocate output file pointer arrays + <li> Allocate the data buffer + <li> Allocate the send buffer + <li> Add myself to the Grid Hierarchy + </ul> + But this is pretty straightforward code nonetheless. + <p> + Note that the input variable "storage" determines whether to + allocate the 3-D data or not. You can toggle this later + with @seeroutine DisableGFDataStorage and @seeroutine + EnableGFDataStorage + @enddesc + @calls DisableGFDataStorage, EnableGFDataStorage +@@*/ + + +pGF *SetupPGF(pGH *GH, char *name, int docomm, int storage, int stagger) { + pGF *res; /* The result */ + pGF **rlist; /* A temporary to add to the GH */ + int i, dir, dirp1, dirp2, sz; /* Counter thingies */ + + /* Space for the struct */ + res = (pGF *)malloc(sizeof(pGF)); + + /* Set my parent GH */ + res->parentGH = GH; + + /* Set up the name */ + res->name = (char *)malloc((1+strlen(name))*sizeof(char)); + strcpy(res->name,name); + + res->stagger = stagger; + if( (GH->stagger == PUGH_NO_STAGGER) && + (res->stagger != PUGH_VERTEXCTR) ) + { printf ("FATAL ERROR! Cannot have staggered grids inside a GH with \n"); + printf (" designation PUGH_NO_STAGGER. \n"); + STOP; + } + + + /* Setup IO stuff to default no IO with space for xgraph files.*/ + res->do_3dio = 0; + res->do_2dio = 0; + res->do_1dio = 0; + res->do_0dio = 0; + res->do_conv = 0; + + res->convfile = NULL; + + res->xgfile = (FILE **)malloc(5*sizeof(FILE *)); + for (i=0;i<5;i++) + res->xgfile[i] = NULL; + + res->zerodfile = (FILE **)malloc(4*sizeof(FILE *)); + for (i=0;i<4;i++) + res->zerodfile[i] = NULL; + + res->IEEEfile = NULL; + + for (i=0;i<3;i++) + res->IEEEfile_2d[i] = NULL; + + res->show_maxval = 0; + + res->lastio_it[0] = -1; res->lastio_it[1] = -1; + res->lastio_it[2] = -1; res->lastio_it[3] = -1; + + SetGFComm(res, docomm); + + + /* Finally add this to the list of grid funcs in the gh */ + GH->ngridFuncs ++; + rlist = (pGF **)malloc(GH->ngridFuncs * sizeof(pGF *)); + if (GH->ngridFuncs > 1) { + for (i=0;i<GH->ngridFuncs-1;i++) + rlist[i] = GH->gridFuncs[i]; + free(GH->gridFuncs); + } + res->gfno = GH->ngridFuncs - 1; + rlist[GH->ngridFuncs-1] = res; + GH->gridFuncs = rlist; + /* Set up the comm layer */ + res->padddata = NULL; + res->data = NULL; + res->send_buffer = NULL; + res->recv_buffer = NULL; + + if (storage == PUGH_NOSTORAGE) { + res->storage = PUGH_STORAGE; /* Fake it out... */ + DisableGFDataStorage(GH, res); + } else if (storage == PUGH_STORAGE) { + res->storage = PUGH_NOSTORAGE; /* Fake it out... */ + EnableGFDataStorage(GH, res); + } else { + printf ("WARNING: Storage keyword set incorrectly for %s [%d]\n",res->name,storage); + printf (" Perhaps misspelled in GridFuncList? Defaulting to storage active.\n"); + EnableGFDataStorage(GH, res); + } + +#ifdef MPI + /* Null my send and recieve requests */ + for (i=0;i<6;i++) { + res->sreq[i] = MPI_REQUEST_NULL; + res->rreq[i] = MPI_REQUEST_NULL; + } +#endif + + /* And return myself... */ + return res; +} + + /*@@ + @routine DestroyPGF + @date Thu Aug 21 11:44:22 1997 + @author Paul Walker + @desc + Destroys a GF object. + @enddesc +@@*/ + +void DestroyPGF(pGH *GH, pGF **GFin) { + pGF *GF; + GF = *GFin; + + if (GF->storage) + DisableGFDataStorage(GH, GF); + + free(GF->name); + free(GF->padddata); + + free(GF->xgfile); + free(GF->zerodfile); + + GF->data = NULL; + + free(GF); + *GFin = NULL; + +} + + + /*@@ + @routine EnableGFDataStorage + @date Thu Apr 3 12:44:38 1997 + @author Paul Walker + @desc + This routine toggles the data storage to the "on" + position. That means that data points to a 3D + array, rather than a single Double, and the + storage flag is set to one. This is used quite + a lot by thorns which need to toggle memory. for + instance, see the trK Driver in the util thron. + @enddesc +@@*/ + +/* Statics for padding */ +static int padding_active = -1; +static int padding_cacheline_bits, padding_size, padding_address_spacing; + +int zero_memory = -1; + +void EnableGFDataStorage(pGH *GH, pGF *GF) { + int i, dir, dirp1, dirp2, sz; + int special_pad, cache_size, start; + + if (zero_memory < 0) { + zero_memory = Contains("zero_memory","yes"); + if (zero_memory) { + printf ("Zeroing memory for allocated GFs at alloc time\n"); + } + } + + + if (padding_active == -1) { + padding_active = Contains("padding_active","yes"); + padding_cacheline_bits = Geti("padding_cacheline_bits"); + padding_size = Geti("padding_size"); + padding_address_spacing = Geti("padding_address_spacing"); + if (padding_active) { + printf ("PUGH Memory padding active. Stats:\n"); + printf (" cacheline_bits : %d\n", + padding_cacheline_bits); + printf (" size : %d\n", + padding_size); + printf (" spacing : %d\n", + padding_address_spacing); + } + } + + if (GF->storage) { + printf ("WARNING: Tried to enable %s when already enabled\n", + GF->name); + return; + } + + /* Set up the send buffers. Note we only do this if + we are not using the derived types comm style. + */ + assert(!GF->send_buffer); + GF->send_buffer = (Double **)malloc(6*sizeof(Double *)); + + assert(!GF->recv_buffer); + GF->recv_buffer = (Double **)malloc(6*sizeof(Double *)); + + for (i=0;i<6;i++) { + dir = i/2; + dirp1 = (i/2+1)%3; + dirp2 = (i/2+2)%3; + + if (dir == 0) { + sz = GH->stencil_width * GH->lny * GH->lnz; + } else if (dir == 1) { + sz = GH->stencil_width * GH->lnx * GH->lnz; + } else { + sz = GH->stencil_width * GH->lnx * GH->lny; + } + + if (GH->neighbors[GH->myproc][i] >= 0) { + GF->buffer_sz[i] = sz; + GF->send_buffer[i] = (Double *)malloc(sz*sizeof(Double)); + GF->recv_buffer[i] = (Double *)malloc(sz*sizeof(Double)); + assert(GF->recv_buffer[i]); + assert(GF->send_buffer[i]); + } else { + GF->buffer_sz[i] = 0; + GF->send_buffer[i] = NULL; + GF->recv_buffer[i] = NULL; + } + } + + /* Set up the storage */ + if (GF->padddata) free (GF->padddata); + if (!padding_active) { + GF->padddata = (Double *)malloc((GH->npoints)*sizeof(Double)); + GF->data = GF->padddata; /* No padding case */ + if (zero_memory) { + for (i=0;i<GH->npoints;i++) + GF->padddata[i] = 0.0; + } + } else { + GF->padddata = (Double *)malloc((GH->npoints + padding_size) + *sizeof(Double)); + + /* Align the actual starting address on a cache line. + More specifically align on a unique cache line for + each field. + */ + cache_size = 2 << (padding_cacheline_bits - 1); + start = ((long) (GF->padddata) / sizeof(Double))%cache_size; + + special_pad = ((GF->gfno) * padding_address_spacing + cache_size - start)%cache_size; + GF->data = (GF->padddata) + special_pad; + + if (special_pad > padding_size){ + printf("FATAL ERROR: padding size not large enough for cache type specified %d %d %d \n",special_pad,padding_size,cache_size); + STOP; + } + if (zero_memory) { + for (i=0;i<GH->npoints+padding_size;i++) + GF->padddata[i] = 0.0; + } + } + + if (!GF->padddata) { + printf ("FATAL ERROR: Cannot allocate data for %s [%d]\n", + GF->name, GF->gfno); + STOP; + } + GF->storage = 1; + + +} + + /*@@ + @routine DisableGFDataStorage + @date Thu Apr 3 12:45:34 1997 + @author Paul Walker + @desc + This routine disables the grid function storage. + That is, it un-allocates the 3D array and in its + place allocates a single Double with the value + 0.0. This allows us to still ahve something to + pass around (an array of size (1,1,1) in fortran + speak) but also to not need all our 3D arrays + "on" all the time. + @enddesc +@@*/ + + +void DisableGFDataStorage(pGH *GH, pGF *GF) { + int i; + + if (!GF->storage) { + printf ("Warning: Tried to disable %s when already disabled\n", + GF->name); + return; + } + + if (GF->padddata) { + free(GF->padddata); + GF->padddata = NULL; + GF->data = NULL; + } + + if (GF->send_buffer) { + for (i=0;i<6;i++) + if (GF->send_buffer[i]) { +#ifdef MPI + if(GF->sreq[i] != MPI_REQUEST_NULL){ + CACTUS_MPI_ERROR(MPI_Request_free(&(GF->sreq[i]))); + } +#endif + free(GF->send_buffer[i]); + GF->send_buffer[i] = NULL; + } + free(GF->send_buffer); + GF->send_buffer = NULL; + } + + + if (GF->recv_buffer) { + for (i=0;i<6;i++) + if (GF->recv_buffer[i]) { + free(GF->recv_buffer[i]); + GF->recv_buffer[i] = NULL; + } + free(GF->recv_buffer); + GF->recv_buffer = NULL; + } + + GF->padddata = (Double *)malloc(sizeof(Double)); + GF->data = GF->padddata; + GF->data[0] = 0.0; /* Very important! */ + GF->storage = 0; +} + + /*@@ + @routine SetGFComm + @date Thu Apr 3 12:46:23 1997 + @author Paul Walker + @desc + This sets the docomm[3] array of the GF based + on the setting of the comm flag. Note the + comm flag constants are defined in + @seeheader pughDriver.h. As well as SetupPGF + this is called by thorns (eg, elliptic) which + need to toggle this state. + @enddesc + @calledby SetupPGF +@@*/ + + +void SetGFComm(pGF *res, int docomm) { + /* Copy docomm */ + int i; + res->commflag = docomm; + + for (i=0;i<6;i++) + res->docomm[i] = 0; + + if (docomm == PUGH_NOCOMM) { + for (i=0;i<6;i++) + res->docomm[i] = 0; + } else if (docomm == PUGH_ALLCOMM) { + for (i=0;i<6;i++) + res->docomm[i] = 1; + } else if (docomm == PUGH_XCOMM) { + res->docomm[0] = 1; + res->docomm[1] = 1; + } else if (docomm == PUGH_YCOMM) { + res->docomm[2] = 1; + res->docomm[3] = 1; + } else if (docomm == PUGH_ZCOMM) { + res->docomm[4] = 1; + res->docomm[5] = 1; + } else if (docomm == PUGH_PLUSFACESCOMM) { + res->docomm[1] = 1; + res->docomm[3] = 1; + res->docomm[5] = 1; + } else if (docomm == PUGH_MINUSFACESCOMM) { + res->docomm[0] = 1; + res->docomm[2] = 1; + res->docomm[4] = 1; + } else { + printf("WARNING: Unsupported comm. in grid function %s .\n",res->name); + printf(" Perhaps misspelled in GridFuncList? Defaulting to allcomm.\n"); + for (i=0;i<6;i++) + res->docomm[i] = 1; + } + + /* Handle nx = 1 type cases. This is only important for one + processor MPI periodic boundaries. + */ + /* This is a hack to get a GH. Unfortunately I decided not + to pass a GH to this routine, and I should go back and + change it, but that is a lot of changes so for now I'll + do the following. + */ + if (GHList->GH->nx == 1) { + res->docomm[0] = 0; + res->docomm[1] = 0; + } + + if (GHList->GH->ny == 1) { + res->docomm[2] = 0; + res->docomm[3] = 0; + } + + if (GHList->GH->nz == 1) { + res->docomm[4] = 0; + res->docomm[5] = 0; + } +} diff --git a/src/pugh/SetupSliceCenter.c b/src/pugh/SetupSliceCenter.c new file mode 100644 index 0000000..58e6a07 --- /dev/null +++ b/src/pugh/SetupSliceCenter.c @@ -0,0 +1,137 @@ + /*@@ + @file SetupSliceCenter.c + @date + @author + @desc + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + +void SetupSliceCenter(pGH *GH) { + int slice_center; + + + /* slice_center used to be part of the GF, */ + /* either 1 (default) or 0 (octant,edge)*/ + /* now spxyz[0],spxyz[1],spxyz[2] are part of the GH */ + + if ((GH->spxyz[0]!=-1)||(GH->spxyz[1]!=-1)||(GH->spxyz[2]!=-1)) { + printf("WARNING in SetupSliceCenter: you already have these Slice Center settings:\n(OK for BoxInBox)"); + printf("spxyz[0],spxyz[1],spxyz[2]: %d %d %d \n", + GH->spxyz[0], GH->spxyz[1], GH->spxyz[2]); + return; + } + + slice_center=1; + if (Contains("grid","octant") || Contains("grid","quadrant") || + Contains("grid","bitant")) + slice_center=0; + if (Contains("slice_loc","edge")) slice_center=0; + + if (slice_center) { + /* For NONE octant mode: the center for slicings */ + /* is moved to the center index */ + /* (unless nx,ny,nz=1) */ + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + } + + if (GH->ny == 1) { + GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } else { + GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + } + + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + } + } + /* for octant mode, the slicing center is the index 1 or zero*/ + else { + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = 1 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0]; + } + + if (GH->ny == 1) { + GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } else { + GH->spxyz[1] = 1 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = 1 - GH->lb[GH->myproc][1]; + } + + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = 1 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = 1 - GH->lb[GH->myproc][2]; + } + } + + /* Special case: in quadrant mode x and y are like full, but + z is like octant. Thinking about this gives you a headache, + but works. + */ + if (Contains("grid","quadrant")) { + if (GH->nz == 1) { + GH->spxyz[2] = 0; + GH->sp2dxyz[2] = 0; + } else { + GH->spxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + GH->sp2dxyz[2] = GH->nz/2 - GH->lb[GH->myproc][2]; + } + } + + /* Another special case: bitant! Hey, with full and octant + already implemented, bitant is a breeze on the neurons */ + if (Contains("grid","bitant")) + { if (GH->nx == 1) + { GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } + else + { GH->spxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = GH->nx/2 - GH->lb[GH->myproc][0]; + } + if (GH->ny == 1) + { GH->spxyz[1] = 0; + GH->sp2dxyz[1] = 0; + } + else + { GH->spxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + GH->sp2dxyz[1] = GH->ny/2 - GH->lb[GH->myproc][1]; + } + } + +#ifdef THORN_CARTOON_2D + /* for cartoon_2d, treat x as in octant, y and z as in full, BB */ + if (Contains("cartoon_active","yes")) { + if (GH->nx == 1) { + GH->spxyz[0] = 0; + GH->sp2dxyz[0] = 0; + } else { + GH->spxyz[0] = 1 - GH->lb[GH->myproc][0]; + GH->sp2dxyz[0] = 1 - GH->lb[GH->myproc][0]; + } + } +#endif +} diff --git a/src/pugh/SyncGroupGF.c b/src/pugh/SyncGroupGF.c new file mode 100644 index 0000000..2d21ae2 --- /dev/null +++ b/src/pugh/SyncGroupGF.c @@ -0,0 +1,271 @@ +/*@@ + @file SyncGroupGH.c + @author Paul Walker + @date March 1997 + @desc + Synchronizes all the Grid Functions (which have their communications + on) in a given group which has previously been registered to + the GF data base with StoreGFGroupData. The MPI section was + formally called SyncGH.c + @enddesc + @history + @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale + @hdesc Added the functionality to synchronise indices from a group + rather than the whole GH. Changed the routine/file names. + @endhistory + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + +/*#define VERBOSE*/ +#undef COMM_TIMING + +int GetGFGroupData(const char *name, int *nGFs, int **GFIndices); + +/*@@ + @routine SyncGroupGF + @author Paul Walker + @date March 1997 + @desc + Synchronizes all the Grid Functions (which have their communications + on) in a given group which has previously been registered to + the GF data base with StoreGFGroupData. The MPI section was + formally called SyncGH.c + @enddesc + @history + @hdate Sep 27 1998 @hauthor Gabrielle Allen, Tom Goodale + @hdesc Added the functionality to synchronise indices from a group + rather than the whole GH. Changed the routine/file names. + @endhistory + @@*/ + + +/* This routine works in one of two ways: + * + * 1. If using derived data types, it posts receives, posts sends + * waits for all sends and receives, and finishes the receives. + * + * 2. If using buffered communication, it posts recieves, posts + * sends, waits for receives only, and finishes them. We don't + * need to wait for sends until the next time through. This + * is why the wait for the send buffer is immediately before + * the post send. (Since the send buffers are initially set + * to MPI_REQUEST_NULL the first Wait and any waits in + * unused directions are a wash). + * + * This routine is intertwined with the contents of routines + * pGF_PostRecv, pGF_PostSend, and pGF_FinishSend. + * + * Without MPI, this does nothing, of course. + * + * Note this routine is also responsible for updating the + * commt global so we can get a measure of how much time we + * spend in communications overhead. + * + * Also note that it is more efficient to sync a number of + * GF together (as a group or a hierachy) rather than + * singly. This is because syncing single functions waits for the + * receives on a single function before posting the sends on + * all the other functions. + */ + +void SyncGroupGF(pGH *GH,const char *groupname) { +#ifdef MPI + int j, Dir, index; + double t1,t2,t3; + MPI_Request *sr = NULL; + MPI_Status ms, mss; +#endif + int i; + double st[PUGH_NTIMERS],et[PUGH_NTIMERS]; + int l; + int *GFindices; + int nGFs; + + pughTimers(st); + + /* Find which grid functions make up the group */ + GetGFGroupData(groupname,&nGFs,&GFindices); + + /* Say which grid functions will actually be synchronised */ + if (Contains("cactus_sync_verbose","yes")) { + printf("Syncing %d grid functions in group %s\n",nGFs,groupname); + printf(" Syncing GFs ..."); + for (i=0;i<nGFs;i++) { + if (GH->gridFuncs[GFindices[i]]->storage + && (GH->gridFuncs[GFindices[i]]->docomm[1] || + GH->gridFuncs[GFindices[i]]->docomm[2] || + GH->gridFuncs[GFindices[i]]->docomm[3] || + GH->forceSync)) + printf(" %s",GH->gridFuncs[GFindices[i]]->name); + } + printf("\n"); + } + +#ifdef MPI + if (GH->commmodel == PUGH_DERIVEDTYPES) { + /* 2 faces, send and receive is the 2 * 2 */ + sr = (MPI_Request *)malloc(nGFs * 2 * 2 * sizeof(MPI_Request)); + for (i=0;i<nGFs * 2 * 2; i++) + sr[i] = MPI_REQUEST_NULL; + } + + for (Dir = 0; Dir < 3; Dir ++) { +#ifdef COMM_TIMING + t1 = MPI_Wtime(); +#endif + for(i=0;i<nGFs;i++) { + pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir); + pGF_PostRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("PR : %lf\n",t2-t1); +#endif + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + for(i=0;i<nGFs;i++) { + /* Wait for the last send. Since these will be null if the + are not used, this is always safe. + */ + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir]),&mss); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir); + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]),&mss); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } else { + for(i=0;i<nGFs;i++) { + /* Wait for the last send. Since these will be null if the + are not used, this is always safe. + */ + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir); + pGF_PostSend(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } + +#ifdef COMM_TIMING + t1 = MPI_Wtime(); + printf ("PS : %lf\n",t1-t2); +#endif + + /* Now comes the big difference between derived types and + allocated buffers. With derived types, we now have to + wait on all our recieve AND SEND buffers so we can + keep on using the send buffers ( as communications are + in-place). With the allocated we have to wait on each + recieve, but not on the send, since we don't need the + send buffer until we pack a send again (above) + */ + + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + /* Do a wait any on the recieves */ + for(i=0;i<nGFs;i++) { + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir]),&mss); + pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir); + MPI_Wait(&(GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]),&mss); + pGF_FinishRecv(GH,GH->gridFuncs[GFindices[i]],2*Dir+1); + } + } + + if (GH->commmodel == PUGH_DERIVEDTYPES) { + /* Load up the thing for the waitall */ + for (i=0;i<nGFs;i++) { + int id = i*4; + if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir] && + GH->gridFuncs[GFindices[i]]->storage) { + sr[id] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir]; + sr[id+1] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir]; + } else { + sr[id] = MPI_REQUEST_NULL; + sr[id+1] = MPI_REQUEST_NULL; + } + + if (GH->gridFuncs[GFindices[i]]->docomm[2*Dir+1] && + GH->gridFuncs[GFindices[i]]->storage) { + sr[id+2] = GH->gridFuncs[GFindices[i]]->sreq[2*Dir+1]; + sr[id+3] = GH->gridFuncs[GFindices[i]]->rreq[2*Dir+1]; + } else { + sr[id+2] = MPI_REQUEST_NULL; + sr[id+3] = MPI_REQUEST_NULL; + } + } + /* Now do a waitall */ + MPI_Waitall(4*nGFs,sr,&ms); + + } + if (sr) free(sr); +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("FR : %lf\n",t2-t1); +#endif + } +#endif + pughTimers(et); + for (l=0;l<PUGH_NTIMERS;l++) + commt[l] += et[l]-st[l]; +} + + +/*@@ + @routine SyncGH + @author Gabrielle Allen + @date Sep 27 1998 + @desc + Wrapper to support SyncGH command, which is now implemented through + a group. + @enddesc + @@*/ + +void SyncGH(pGH *GH) { + + SyncGroupGF(GH,"$GH"); + +} + + +/*@@ + @routine SyncGF + @author Gabrielle Allen + @date Sep 27 1998 + @desc + Wrapper to support SyncGF command, which is now implemented through + a group. Note that it is more efficient to sync all required GFs at + the same time, using the @seeroutine SyncGroupGF routine with a given group. + @enddesc + @@*/ + +void SyncGF(pGH *GH,pGF *GF) { + + SyncGroupGF(GH,GF->name); + +} + + +/* The old code for SyncGF is listed here, since it illustrates simply + * the use of pGF_PostRecv, pGF_PostSend, pGF_FinishRecv + * */ + +/* void SyncGF(pGH *GH, pGF *GF) { */ + +/* #ifdef MPI */ +/* int Dir; */ +/* MPI_Status mss; */ +/* for (Dir=0;Dir<3;Dir++) { */ +/* pGF_PostRecv(GH, GF, 2*Dir); */ +/* pGF_PostRecv(GH, GF, 2*Dir+1); */ + +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir]),&mss)); */ +/* pGF_PostSend(GH, GF, 2*Dir); */ +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->sreq[2*Dir+1]),&mss)); */ +/* pGF_PostSend(GH, GF, 2*Dir+1); */ + +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir]),&mss)); */ +/* pGF_FinishRecv(GH, GF, 2*Dir); */ +/* CACTUS_MPI_ERROR(MPI_Wait(&(GF->rreq[2*Dir+1]),&mss)); */ +/* pGF_FinishRecv(GH, GF, 2*Dir+1); */ +/* } */ +/* #endif */ +/* } */ + diff --git a/src/pugh/pGF_FinishRecv.c b/src/pugh/pGF_FinishRecv.c new file mode 100644 index 0000000..316dfc1 --- /dev/null +++ b/src/pugh/pGF_FinishRecv.c @@ -0,0 +1,81 @@ + /*@@ + @file pGF_FinishRecv.c + @date Thu Apr 3 11:37:28 1997 + @author Paul Walker + @desc + The routine which finalize the MPI recieves for a grid + function. Critically linked with @seefile pGF_PostRecv.c + and @seefile pGF_PostSend.c + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_FinishRecv + @date Thu Apr 3 11:38:07 1997 + @author Paul Walker + @desc + This routine finalizes the MPI communication through a face. + It is crucially linked with @seeroutine pGF_PostRecv and + @seeroutine pGF_PostSend. + <p> + <b>Important!</b> + This routine does <b>not</b> wait on the recieves. + Before it is called, you must do the MPI_Wait or + else you will not get the right answer. for an + example of this, see @seeroutine SyncGroupGF or + @seeroutine SyncGroupGF + @enddesc + @calledby SyncGroupGF + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory +@@*/ + +void pGF_FinishRecv(pGH *GH, pGF *GF, int dir) { +#ifdef MPI + int R; + MPI_Status ms; + int istart, iend, jstart, jend, kstart, kend; + int ii,jj,kk,xx; + int tmp; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + /* Here wait one at a time, so the others can arrive while + we copy the data back in... */ + +#ifdef DEBUG_PRINT + printf ("FINISHRECV: GF %s Side %d Proc %d request %d\n", + GF->name,dir,GH->myproc,GF->rreq[dir]); +#endif + + /* Copy buffer onto GF Storage */ + istart = GH->ghosts[GF->stagger][dir][0][0]; + iend = GH->ghosts[GF->stagger][dir][0][1]; + jstart = GH->ghosts[GF->stagger][dir][1][0]; + jend = GH->ghosts[GF->stagger][dir][1][1]; + kstart = GH->ghosts[GF->stagger][dir][2][0]; + kend = GH->ghosts[GF->stagger][dir][2][1]; + + /* Great now copy. Note ordering is just link in PostSend */ + xx=0; + for (kk=kstart; kk<kend; kk++) + for (jj=jstart; jj<jend; jj++) + for (ii=istart; ii<iend; ii++) + GF->data[DATINDEX(GH,ii,jj,kk)] = GF->recv_buffer[dir][xx++]; + + } + +#endif +} diff --git a/src/pugh/pGF_PostRecv.c b/src/pugh/pGF_PostRecv.c new file mode 100644 index 0000000..81c4a30 --- /dev/null +++ b/src/pugh/pGF_PostRecv.c @@ -0,0 +1,69 @@ + /*@@ + @file pGF_PostRecv.c + @date Fri Feb 21 11:28:06 1997 + @author + @desc + Routines which post all the IRecv commands for a GF. These + allow the asyncronous model proposed in, for example, + @seeroutine SyncGF to go! + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_PostRecv + @date Thu Apr 3 12:10:46 1997 + @author Paul Walker + @desc + This routine posts a recieve (MPI_Irecv) of the buffer + we want to get the send from another processor, which + will be sent by @seeroutine pGF_PostSend and then + finalized by @seeroutoine pGF_FinishRecv. + <p> + Aside from a silly calculation to get a unique tag + based on neigbors and processors, this is a very + straightforward routine. + @enddesc + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory + @@*/ + +void pGF_PostRecv(pGH *GH, pGF *GF, int dir) { +#ifdef MPI + int R; + int rtag; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + rtag = 1000 + dir + 2 * (GH->myproc + GH->nprocs * GF->gfno); + /* mod the tag to force MPI compliance */ + rtag = rtag % 32768; +#ifdef DEBUG_PRINT + printf ("RECV GF %s Side %d Proc %d rtag %d size %d from proc %d\n", + GF->name,dir,GH->myproc,rtag, + GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]); +#endif + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) + CACTUS_MPI_ERROR(MPI_Irecv((void*)(GF->recv_buffer[dir]), + GF->buffer_sz[dir],PUGH_MPI_TYPE, + GH->neighbors[GH->myproc][dir],rtag, + GH->PUGH_COMM_WORLD,&(GF->rreq[dir]))); + if (GH->commmodel == PUGH_DERIVEDTYPES) + CACTUS_MPI_ERROR(MPI_Irecv((void *)(GF->data), + 1,GH->recv_dt[GF->stagger][dir], + GH->neighbors[GH->myproc][dir],rtag, + GH->PUGH_COMM_WORLD,&(GF->rreq[dir]))); + } +#endif +} diff --git a/src/pugh/pGF_PostSend.c b/src/pugh/pGF_PostSend.c new file mode 100644 index 0000000..d8ec630 --- /dev/null +++ b/src/pugh/pGF_PostSend.c @@ -0,0 +1,116 @@ + /*@@ + @file pGF_PostSend.c + @date Thu Apr 3 11:58:14 1997 + @author Paul Walker + @desc + The send-arm of the three-way pugh communcations branch + which is @seefile pGF_PostRecv.v, @seefile pGF_PostSend.c + and @seefile pGF_FinishRecv.c. See the documentation for + @seeroutine pGF_PostSend for details on this routine. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_PostSend + @date Thu Apr 3 11:58:59 1997 + @author Paul Walker + @desc + This routine posts an asyncronous MPI send of a buffer + for a face (MPI_Isend). It does this by first packing up + a send buffer (allocated in @seeroutine SetupPGF) then + sending it out on the pipe. + <p> + Since this is an asynchronous communications model we + assume that a recieve has been posted for the send. + thus the correct calling order for this is as in + @seeroutine SyncGF, eg, after a Recv. + <p> + Note this does <b>not</b> wait on the send buffer from previous + communications. It is the users responsibility to wait on + that buffer. + @enddesc + @calledby SyncGroupGF + @history + @hdate Nov 4 1998 @hauthor Gabrielle Allen + @hdesc Allow for forced synchronization of all GFs with storage + @endhistory +@@*/ + +void pGF_PostSend(pGH *GH, pGF *GF, int dir) { + int R, stag, dircomp; + int tmp; + int ii,jj,kk,xx; + int istart, iend; + int jstart, jend; + int kstart, kend; + +#ifdef MPI + Double ts, tw, tp; + + MPI_Status ms; + + /* Return if GF has no storage */ + if (!(GF->storage)) return; + + /* Return if communication not required and no forced synchronisation */ + if (!(GH->forceSync || GF->docomm[dir])) return; + + if (GH->neighbors[GH->myproc][dir] >= 0) { + ts = MPI_Wtime(); + dircomp = dir+1; /* Complementary direction */ + if (dircomp %2 == 0) dircomp = dir-1; + /* Note this is the complement of the rtag set in PostRecv */ + stag = 1000 + dircomp + 2 * (GH->neighbors[GH->myproc][dir] + + GH->nprocs * GF->gfno); + /* mod the tag to force MPI compliance */ + stag = stag % 32768; + +#ifdef DEBUG_PRINT + printf ("SEND: GF %s Side %d Proc %d stag %d Size %d to %d\n", + GF->name,dir,GH->myproc,stag, + GF->buffer_sz[dir],GH->neighbors[GH->myproc][dir]); +#endif + + if (GH->commmodel == PUGH_DERIVEDTYPES) + CACTUS_MPI_ERROR(MPI_Isend((void *)(GF->data), + 1,GH->send_dt[GF->stagger][dir], + GH->neighbors[GH->myproc][dir],stag, + GH->PUGH_COMM_WORLD,&(GF->sreq[dir]))); + + if (GH->commmodel == PUGH_ALLOCATEDBUFFERS) { + /* Copy my information into the send buffer */ + istart = GH->overlap[GF->stagger][dir][0][0]; + iend = GH->overlap[GF->stagger][dir][0][1]; + jstart = GH->overlap[GF->stagger][dir][1][0]; + jend = GH->overlap[GF->stagger][dir][1][1]; + kstart = GH->overlap[GF->stagger][dir][2][0]; + kend = GH->overlap[GF->stagger][dir][2][1]; + + /* Great now copy */ + xx = 0; + for (kk=kstart; kk<kend; kk++) + for (jj=jstart; jj<jend; jj++) + for (ii=istart; ii<iend; ii++) + GF->send_buffer[dir][xx++] = GF->data[DATINDEX(GH,ii,jj,kk)]; + + tp = MPI_Wtime(); + + /* post send */ + CACTUS_MPI_ERROR(MPI_Isend((void*)(GF->send_buffer[dir]), + GF->buffer_sz[dir],PUGH_MPI_TYPE, + GH->neighbors[GH->myproc][dir],stag, + GH->PUGH_COMM_WORLD,&(GF->sreq[dir]))); + tw = MPI_Wtime(); +#ifdef COMM_TIMING + printf ("GF %s Dir %d Time %lf\n", + GF->name, dir, tw-ts); +#endif + } + } +#endif +} diff --git a/src/pugh/pGF_Reduction.c b/src/pugh/pGF_Reduction.c new file mode 100644 index 0000000..bc8e211 --- /dev/null +++ b/src/pugh/pGF_Reduction.c @@ -0,0 +1,198 @@ + /*@@ + @file pGF_Reduction.c + @date Thu Apr 3 11:54:53 1997 + @author Paul Walker + @desc + Various MPI reduction operators for maxima, minima, norms + and the like on grid functions. These are all pretty + straighforward thanks to the miracle of MPI_Allreduce. + @enddesc + @version $Id$ + @@*/ + +#include "pugh.h" + +static char *rcsid = "$Id$"; + + /*@@ + @routine pGF_MaxVal + @date Thu Apr 3 11:55:25 1997 + @author Paul Walker + @desc + Returns the maximum value of a distributed grid function. + @enddesc +@@*/ + + +Double pGF_MaxVal(pGH *GH, pGF *GF) { + Double res, lres; + int i; + + if (!GF->storage) return GF->maxval; + + res = GF->data[0]; + for (i=1;i<GH->npoints;i++) + res = (GF->data[i] > res? GF->data[i] : res); + + + /* So now res is the local max */ +#ifdef MPI + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MAX,GH->PUGH_COMM_WORLD)); +#endif + + return res; +} + + + /*@@ + @routine pGF_MinVal + @date Thu Apr 3 11:55:48 1997 + @author Paul Walker + @desc + Returns the minimum value of a distributed grid function. + @enddesc +@@*/ + +Double pGF_MinVal(pGH *GH, pGF *GF) { + Double res, lres; + int i; + + if (!GF->storage) return GF->minval; + + res = GF->data[0]; + for (i=1;i<GH->npoints;i++) + res = (GF->data[i] < res ? GF->data[i] : res); + + /* So now res is the local max */ +#ifdef MPI + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_MIN,GH->PUGH_COMM_WORLD)); +#endif + + return res; +} + + + + /*@@ + @routine pGF_Norm1 + @date Thu Apr 3 11:56:05 1997 + @author Paul Walker + @desc + Returns the average of a distributed grid function. + @enddesc +@@*/ + +Double pGF_Norm1(pGH *GH, pGF *GF) { + Double res, lres; + int i, ii, jj, kk; + int is,ie,js,je,ks,ke; + int np,tnp; + + if (!GF->storage) return GF->norm1; + + +#define ABS(x) ((x)<0?-(x):(x)) + +#ifndef MPI + res = 0.0; + for (i=0;i<GH->npoints;i++) res += ABS(GF->data[i]); + tnp = GH->npoints; + +#else + res = 0.0; + np = 0; + for (kk=GH->ownership[GF->stagger][2][0]; + kk < GH->ownership[GF->stagger][2][1]; + kk ++) + for (jj = GH->ownership[GF->stagger][1][0]; + jj < GH->ownership[GF->stagger][1][1]; + jj ++) + for (ii = GH->ownership[GF->stagger][0][0]; + ii < GH->ownership[GF->stagger][0][1]; + ii ++) { + res += ABS(GF->data[DATINDEX(GH,ii,jj,kk)]); + np++; + } + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD)); + CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD)); +#endif + + res = res/tnp; + return res; +} + + + /*@@ + @routine pGF_Norm2 + @date Thu Apr 3 11:56:36 1997 + @author Paul Walker + @desc + Returns the "norm2" of a distributed grid function. That + is, returns $\sqrt{\Sigma (a_i * a_i) / np}$. + @enddesc +@@*/ + + +Double pGF_Norm2(pGH *GH, pGF *GF) { + Double res, lres; + int i, ii, jj, kk; + int is,ie,js,je,ks,ke; + int np, tnp; + + if (!GF->storage) return GF->norm2; + +#ifndef MPI + res = 0.0; + for (i=0;i<GH->npoints;i++) res += GF->data[i]*GF->data[i]; + tnp = GH->npoints; + +#else + /* MPI version depends heavily on the ownership array */ + res = 0.0; + np = 0; + + for (kk=GH->ownership[GF->stagger][2][0]; + kk < GH->ownership[GF->stagger][2][1]; + kk ++) + for (jj = GH->ownership[GF->stagger][1][0]; + jj < GH->ownership[GF->stagger][1][1]; + jj ++) + for (ii = GH->ownership[GF->stagger][0][0]; + ii < GH->ownership[GF->stagger][0][1]; + ii ++) { + res += GF->data[DATINDEX(GH,ii,jj,kk)] * + GF->data[DATINDEX(GH,ii,jj,kk)]; + np ++; + } + lres = res; + CACTUS_MPI_ERROR(MPI_Allreduce(&lres,&res,1,PUGH_MPI_TYPE,MPI_SUM,GH->PUGH_COMM_WORLD)); + CACTUS_MPI_ERROR(MPI_Allreduce(&np,&tnp,1,MPI_INT,MPI_SUM,GH->PUGH_COMM_WORLD)); +#endif + + res = sqrt(res/tnp); + return res; +} + + /*@@ + @routine pGF_AllNorms + @date Mon Jun 30 10:44:57 1997 + @author Paul Walker + @desc + Returns all 4 norms into a Double pointer in the order + max min norm1 norm2 + <p> + This is purely a convenience function. + @enddesc + @calls pGF_MaxVal, pGF_MinVal, pGF_Norm1, pGF_Norm2 +@@*/ + +void pGF_AllNorms(pGH *GH, pGF *GF, Double *res) { + res[0] = pGF_MaxVal(GH,GF); + res[1] = pGF_MinVal(GH,GF); + res[2] = pGF_Norm1(GH,GF); + res[3] = pGF_Norm2(GH,GF); +} + diff --git a/src/pugh_Comm.h b/src/pugh_Comm.h new file mode 100644 index 0000000..0108c7b --- /dev/null +++ b/src/pugh_Comm.h @@ -0,0 +1,52 @@ + /*@@ + @header pugh_Comm.h + @date Thu Feb 4 11:42:50 1999 + @author Tom Goodale + @desc + + @enddesc + @version $Header$ + @@*/ + +#ifndef _PUGH_COMM_H_ +#define _PUGH_COMM_H_ + +#ifdef _cplusplus +extern "C" { +#endif + + /* Overloaded functions. */ +int pugh_SyncGroup(cGH *GH, const char *group); +int pugh_EnableGroupStorage(cGH *GH, const char *group); +int pugh_EnableGroupStorage(cGH *GH, const char *group); + +int pugh_EnableGroupComm(cGH *GH, const char *group); +int pugh_EnableGroupComm(cGH *GH, const char *group); + +int pugh_Barrier(cGH *GH); +int pugh_Reduce(cGH *GH, + const char *operation, + int n_infields, + int n_outfields, + int out_type, + void **outarray, + ...); + +int pugh_Interp(cGH *GH, + const char *operation, + int n_coords, + int n_infields, + int n_outfields, + int n_points, + int type, + ...); + +int pugh_ParallelInit(cGH *GH); +int pugh_Exit(cGH *GH); +int pugh_Abort(cGH *GH); + +#ifdef _cplusplus +} +#endif + +#endif diff --git a/src/pugh_extension.h b/src/pugh_extension.h new file mode 100644 index 0000000..4307ccd --- /dev/null +++ b/src/pugh_extension.h @@ -0,0 +1,30 @@ + /*@@ + @header pugh_extension.h + @date Thu Feb 4 10:51:27 1999 + @author Tom Goodale + @desc + The header file for PUGH GH extension stuff. + @enddesc + @version $Header$ + @@*/ + +#ifndef _PUGH_EXTENSION_H_ +#define _PUGH_EXTENSION_H_ + +#ifdef _cplusplus +extern "C" { +#endif + +void *pugh_SetupGH(tFleshConfig *config, + int convergence_level, + cGH *GH); + +int pugh_InitGH(cGH *GH); + +int pugh_rfrTraverseGH(cGH *GH, int rfrpoint); + +#ifdef _cplusplus +} +#endif + +#endif |