diff options
-rw-r--r-- | src/Comm.c | 213 | ||||
-rw-r--r-- | src/GHExtension.c | 8 | ||||
-rw-r--r-- | src/SetupGroup.c | 12 | ||||
-rw-r--r-- | src/SetupPGV.c | 28 |
4 files changed, 235 insertions, 26 deletions
@@ -24,16 +24,16 @@ extern int PUGH_GHExtension; /* local function prototypes */ -int pugh_SyncGroupArray (cGH *GH, - int group, - int vtype, - int n_vars, - int timelevel); int pugh_SyncGroupGF (cGH *GH, int group, int vtype, int n_vars, int timelevel); +int pugh_SyncGroupGA (cGH *GH, + int group, + int vtype, + int n_vars, + int timelevel); int pugh_SyncGFs(pGH *pughGH, int first_var, int n_vars, int timelevel); void DisableGFDataStorage(pGH *GH, pGF *GF); void EnableGFDataStorage(pGH *GH, pGF *GF); @@ -64,6 +64,9 @@ int pugh_EnableGFGroupStorage(cGH *GH, void pGF_FinishRecv(pGH *GH, pGF *GF, int dir); void pGF_PostRecv(pGH *GH, pGF *GF, int dir); void pGF_PostSend(pGH *GH, pGF *GF, int dir); +void FinishReceiveGA(pGH *GH, pGA *GA, int dir); +void PostReceiveGA(pGH *GH, pGA *GA, int dir); +void PostSendGA(pGH *GH, pGA *GA, int dir); void SetGFComm(pGH *GH, pGF *res, int docomm); int pugh_GAComm(pGA *GA, int docomm); @@ -111,18 +114,20 @@ int pugh_SyncGroup (cGH *GH, const char *groupname) timelevel = pgroup.numtimelevels - 1; /* branch to synchronization function according to group type */ - switch (pgroup.grouptype) { + switch (pgroup.grouptype) + { + case GROUP_SCALAR : rc = 0; + CCTK_WARN(4,"Synchronising a scalar in PUGH"); break; case GROUP_ARRAY : - /* rc = pugh_SyncGA (GH, - group, - pgroup.vartype, - pgroup.numvars, - timelevel);*/ - rc = 1; + rc = pugh_SyncGroupGA (GH, + group, + pgroup.vartype, + pgroup.numvars, + timelevel); break; case GROUP_GF : @@ -722,12 +727,192 @@ int pugh_Abort(cGH *GH) /* local functions */ /*****************************************************************************/ -int pugh_SyncGroupArray (cGH *GH, int group, int vtype, int n_vars, int timelevel) + + +int pugh_SyncGroupGA (cGH *GH, + int group, + int vtype, + int n_vars, + int timelevel) +{ + + pGH *pughGH = (pGH *) GH->extensions [PUGH_GHExtension]; + pGA *GA; + int i; + int first_var = CCTK_FirstVarIndexI (group); + + /* Say which grid functions will actually be synchronised */ + if (pughGH->sync_verbose && pughGH->myproc == 0) + { + char *gname = CCTK_GroupName (group); + + printf("Syncing %d arrays in group %s\n", n_vars, gname); + printf(" Syncing arrays ..."); + + for (i = first_var; i < first_var + n_vars; i++) + { + GA = (pGA *) (pughGH->variables [i][timelevel]); + if (GA->storage && (GA->commflag != PUGH_NOCOMM || pughGH->forceSync)) + printf (" %s", GA->name); + } + printf ("\n"); + free (gname); + } + + /* synchronize */ + return pugh_SyncGAs(pughGH, + CCTK_GroupDimI(group), + first_var, + n_vars, + timelevel); +} + + +int pugh_SyncGAs(pGH *pughGH, + int dim, + int first_var, + int n_vars, + int timelevel) { - CCTK_WARN (1, "Syncing of GROUP_ARRAY type not yet supported"); +#ifdef MPI + int Dir; + pGA *GA; +#ifdef COMM_TIMING + double t1,t2; +#endif + MPI_Request *sr = NULL; + MPI_Status mss; +#endif + + /* start the timer for communication time */ +#if 0 + CactusStartTimer (&pughGH->comm_time); +#endif + +#ifdef MPI + if (pughGH->commmodel == PUGH_DERIVEDTYPES) + { + int i; + /* 2 faces, send and receive is the 2 * 2 */ + sr = (MPI_Request *)malloc(n_vars * 2 * 2 * sizeof(MPI_Request)); + for (i=0;i<n_vars * 2 * 2; i++) + sr[i] = MPI_REQUEST_NULL; + } + + for (Dir = 0; Dir < dim; Dir ++) + { + int i; +#ifdef COMM_TIMING + t1 = MPI_Wtime(); +#endif + for (i = first_var; i < first_var + n_vars; i++) + { + GA = (pGA *) (pughGH->variables [i][timelevel]); + PostReceiveGA(pughGH, GA, 2*Dir); + PostReceiveGA(pughGH, GA, 2*Dir+1); + } + +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("PR : %lf\n",t2-t1); +#endif + if (pughGH->commmodel == PUGH_ALLOCATEDBUFFERS) + { + for (i = first_var; i < first_var + n_vars; i++) + { + GA = (pGA *) (pughGH->variables [i][timelevel]); + /* Wait for the last send. Since these will be null if they + are not used, this is always safe. + */ + MPI_Wait(&(GA->sreq[2*Dir]),&mss); + PostSendGA(pughGH, GA, 2*Dir); + MPI_Wait(&(GA->sreq[2*Dir+1]),&mss); + PostSendGA(pughGH, GA, 2*Dir+1); + } + } else { + for (i = first_var; i < first_var + n_vars; i++) + { + GA = (pGA *) (pughGH->variables [i][timelevel]); + PostSendGA(pughGH, GA, 2*Dir); + PostSendGA(pughGH, GA, 2*Dir+1); + } + } + +#ifdef COMM_TIMING + t1 = MPI_Wtime(); + printf ("PS : %lf\n",t1-t2); +#endif + + /* Now comes the big difference between derived types and + allocated buffers. With derived types, we now have to + wait on all our recieve AND SEND buffers so we can + keep on using the send buffers ( as communications are + in-place). With the allocated we have to wait on each + recieve, but not on the send, since we don't need the + send buffer until we pack a send again (above) + */ + + if (pughGH->commmodel == PUGH_ALLOCATEDBUFFERS) + { + /* Do a wait any on the receives */ + for (i = first_var; i < first_var + n_vars; i++) + { + GA = (pGA *) (pughGH->variables [i][timelevel]); + MPI_Wait(&(GA->rreq[2*Dir]),&mss); + FinishReceiveGA(pughGH, GA, 2*Dir); + MPI_Wait(&(GA->rreq[2*Dir+1]),&mss); + FinishReceiveGA(pughGH, GA, 2*Dir+1); + } + } + + if (pughGH->commmodel == PUGH_DERIVEDTYPES) + { + /* Load up the thing for the waitall */ + for (i=0;i<n_vars;i++) { + int id = i*4; + + if (GA->docomm[2*Dir] && + GA->storage) { + sr[id] = GA->sreq[2*Dir]; + sr[id+1] = GA->rreq[2*Dir]; + } else { + sr[id] = MPI_REQUEST_NULL; + sr[id+1] = MPI_REQUEST_NULL; + } + + if (GA->docomm[2*Dir+1] && + GA->storage) { + sr[id+2] = GA->sreq[2*Dir+1]; + sr[id+3] = GA->rreq[2*Dir+1]; + } else { + sr[id+2] = MPI_REQUEST_NULL; + sr[id+3] = MPI_REQUEST_NULL; + } + } + /* Now do a waitall */ + MPI_Waitall(4*n_vars,sr,&mss); + + } + + if (sr) + free(sr); +#ifdef COMM_TIMING + t2 = MPI_Wtime(); + printf ("FR : %lf\n",t2-t1); +#endif + } +#endif /* MPI */ + + /* get the time spent in communication */ + +#if 0 + CactusStopTimer (&pughGH->comm_time); +#endif + return (1); } + /*@@ @routine pugh_SyncGroup @author Paul Walker diff --git a/src/GHExtension.c b/src/GHExtension.c index 264ae86..fe16c5a 100644 --- a/src/GHExtension.c +++ b/src/GHExtension.c @@ -120,6 +120,14 @@ void *pugh_SetupGH(tFleshConfig *config, groupghostsize[i]=*size[i]; } } + else if (pgroup.grouptype == CCTK_ARRAY) + { + groupghostsize = (int *)malloc(pgroup.dim*sizeof(int)); + for (i=0;i<pgroup.dim;i++) + { + groupghostsize[i]=0; + } + } else { groupghostsize = NULL; diff --git a/src/SetupGroup.c b/src/SetupGroup.c index de5d508..072a2da 100644 --- a/src/SetupGroup.c +++ b/src/SetupGroup.c @@ -165,7 +165,6 @@ int pugh_SetupArrayGroup int periodic=0; int *nprocs; - int *nghostzones; /* FIXME: Arrays can't have manual set up yet GAB */ nprocs = (int *) malloc(dim*sizeof(int)); @@ -175,13 +174,6 @@ int pugh_SetupArrayGroup } connectivity = pugh_SetupConnectivity(dim,newGH->nprocs,nprocs,periodic); - - /* FIXME: For now default ghostzones 0 for array GAB */ - nghostzones = (int *)malloc(dim*sizeof(int)); - for (i=0;i<dim;i++) - { - nghostzones[i]=0; - } extras = pugh_SetupPGExtras ( @@ -189,14 +181,14 @@ int pugh_SetupArrayGroup periodic, staggercode, nsize, - nghostzones, + ghostsize, newGH->nprocs, connectivity->nprocs, newGH->myproc ); free(nprocs); - free(nghostzones); + /* free(nghostzones);*/ temp = (void ***)realloc (newGH->variables,(newGH->nvariables+n_variables)*sizeof(void **)); diff --git a/src/SetupPGV.c b/src/SetupPGV.c index 3b1d334..c797afa 100644 --- a/src/SetupPGV.c +++ b/src/SetupPGV.c @@ -376,6 +376,10 @@ int pugh_GenerateNeighbours(int dim, int total_procs, int *nprocs, int **neighbo neighbours[i][idim*2] = pugh_ComposeIJK(dim, nprocs, pos); pos[idim] = temp; } + else + { + neighbours[i][idim*2] = -1; + } pos[idim]++; @@ -393,7 +397,11 @@ int pugh_GenerateNeighbours(int dim, int total_procs, int *nprocs, int **neighbo neighbours[i][idim*2+1] = pugh_ComposeIJK(dim, nprocs, pos); pos[idim] = temp; } - + else + { + neighbours[i][idim*2+1] = -1; + } + pos[idim]--; } } @@ -407,6 +415,20 @@ int pugh_GenerateNeighbours(int dim, int total_procs, int *nprocs, int **neighbo free(pos); +#ifdef DEBUG_PUGH + /* Print neighbours */ + printf("Neighbours (dim: %d)\n",dim); + for (i=0;i<total_procs;i++) + { + printf("Proc %d: ",i); + for (idim=0;idim<2*dim;idim++) + { + printf(" %d",neighbours[i][idim]); + } + printf("\n"); + } +#endif + return retval; } @@ -1274,7 +1296,7 @@ int pugh_EnablePGAStorage(pGA *GA, if(GA->storage == PUGH_NOSTORAGE) { /* Allocate memory for communication buffers */ - for(i = 0; i < GA->extras->dim; i++) + for(i = 0; i < 2*GA->extras->dim; i++) { dir = i/2; @@ -1478,7 +1500,9 @@ void pugh_GAComm(pGA *GA, int docomm) else if (docomm == PUGH_ALLCOMM) { for (idir=0;idir<2*dim;idir++) + { GA->docomm[idir] = 1; + } } else if (docomm == PUGH_PLUSFACESCOMM) { |