From 2c7d036fabfabd0ca83b6be58a15757306a827ee Mon Sep 17 00:00:00 2001 From: tradke Date: Mon, 25 Mar 2002 10:09:45 +0000 Subject: Just syncing my local changes for the new hyperslabbing API with CVS. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHSlab/trunk@76 10716dce-81a3-4424-a2c8-48026a0d3035 --- src/GetHyperslab.c | 314 ++++++++++++++++++++++++++++++++--------------------- src/Mapping.c | 100 ++++++++++------- src/PUGHSlabi.h | 1 + 3 files changed, 254 insertions(+), 161 deletions(-) (limited to 'src') diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c index ad7cbd5..479f2e9 100644 --- a/src/GetHyperslab.c +++ b/src/GetHyperslab.c @@ -80,42 +80,216 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, void *const *hdata /* num_arrays */, CCTK_INT *retvals /* num_arrays */) { - int i, retval; + int i, nprocs, myproc, proc, timelevel, hdatatype, retval; int result, *result_ptr; hslab_mapping_t *mapping; + void *local_hdata; +#ifdef CCTK_MPI + CCTK_INT totals_global, *chunk_hsize, *local_hsize; + MPI_Comm comm; + int *displs, *recvcnts; + void *chunked_hdata; + MPI_Datatype mpi_vtype; +#endif + retval = 0; mapping = PUGHSlabi_GetMapping (mapping_handle); + /*** FIXME: how to deal with local mapping ***/ if (mapping == NULL) { - return (-2); + retval = -1; + } + else + { + /* check mapping consistency */ + /*** FIXME ***/ + /* all vindices[] must match the given mapping */ + +#if 0 + NOTE mapping->totals denotes the number of LOCAL data points !!! + /* check if there's any data to extract */ + if (mapping->totals <= 0) + { + retval = num_arrays; + } +#endif } - /* check mapping consistency */ - /*** FIXME ***/ + if (retval) + { + /* set the retvals[] according to the return value */ + if (retvals) + { + for (i = 0; i < num_arrays; i++) + { + retvals[i] = retval > 0 ? 0 : retval; + } + } + + return (retval); + } + +#ifdef CCTK_MPI + myproc = CCTK_MyProc (GH); + nprocs = CCTK_nProcs (GH); + + /* now build the CCTK_INT buffer to communicate the sizes and offsets */ + /*** FIXME: could also do this already when creating the mapping ***/ + /*** could also cache the chunk_size in the mapping structure to + save following MPI_Allgather() calls but this makes + Hyperslab_DefineGlobalMapping() a collective call ***/ + local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1) + * sizeof (CCTK_INT)); + chunk_hsize = local_hsize + 2*mapping->hdim + 1; + for (i = 0; i < mapping->hdim; i++) + { + local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i]; + local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i]; + } + local_hsize[2*mapping->hdim + 0] = mapping->totals; + + comm = PUGH_pGH (GH)->PUGH_COMM_WORLD; + /* FIXME: do an Allgather here so that all procs know how many data points + we have. This is useful to decide whether we have to gather + something at all or not and thus can return immediately */ + CACTUS_MPI_ERROR (MPI_Allgather (local_hsize, 2*mapping->hdim + 1, + PUGH_MPI_INT, chunk_hsize, + 2*mapping->hdim + 1, PUGH_MPI_INT, comm)); + + /* sum up the total number of hyperslab data points */ + totals_global = 0; + for (i = 0; i < nprocs; i++) + { + totals_global += chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + } /* check if there's any data to extract */ - if (mapping->totals == 0) + if (totals_global <= 0) { + if (retvals) + { + memset (retvals, 0, num_arrays * sizeof (CCTK_INT)); + } + free (local_hsize); return (0); } - retval = 0; + /* set the displacements/receive counts for the gather operation */ + displs = (int *) malloc (2 * nprocs * sizeof (int)); + recvcnts = displs + nprocs; + + /* for hdim == 1 we let MPI_Gatherv() do the sorting directly into the + hyperslab buffer using the given hyperslab offsets and sizes */ + if (mapping->hdim == 1) + { + for (i = 0; i < nprocs; i++) + { + recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + displs[i] = mapping->is_diagonal_in_3D ? + i == 0 ? 0 : displs[i-1] + recvcnts[i-1] : + chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim - 1]; + } + } + else + { + displs[0] = 0; + recvcnts[0] = chunk_hsize[2*mapping->hdim]; + for (i = 1; i < nprocs; i++) + { + displs[i] = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) + + 2*mapping->hdim]; + recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + } + } +#endif + + /* now loop over all hyperslabs */ + /*** FIXME: should optimize this loop away ***/ for (i = 0; i < num_arrays; i++) { - /* get the processor-local hyperslab */ + proc = procs ? procs[i] : DEFAULT_PROCESSOR; + timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL; + hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]); result_ptr = retvals ? &retvals[i] : &result; - *result_ptr = GetLocalHyperslab (GH, mapping, - procs ? procs[i] : DEFAULT_PROCESSOR, - vindices[i], - timelevels ? timelevels[i] : DEFAULT_TIMELEVEL, - hdatatypes ? hdatatypes[i] : -1, hdata[i]); + + if (mapping->totals <= 0) + { + local_hdata = NULL; + *result_ptr = 0; + } + else + { + /* allocate temporary array to keep the local hyperslab data */ + local_hdata = nprocs == 1 ? + hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype)); + /* get the processor-local hyperslab */ + *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i], + timelevel, hdatatype, local_hdata); + } + if (*result_ptr == 0) { retval++; } + + if (nprocs == 1) + { + continue; + } + +#ifdef CCTK_MPI + if (proc < 0 || proc == myproc) + { + /* allocate temporary array to receive the chunks from all processors */ + /* for the case of hdim == 1, the sorting is done by MPI */ + chunked_hdata = mapping->hdim > 1 ? + malloc (totals_global * CCTK_VarTypeSize (hdatatype)) : + hdata[i]; + } + else + { + chunked_hdata = NULL; + } + + /* detect the MPI datatype to use */ + mpi_vtype = PUGH_MPIDataType (PUGH_pGH (GH), hdatatype); + + /* collect the hyperslab chunks from all processors */ + if (proc < 0) + { + CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, mapping->totals, + mpi_vtype, chunked_hdata, recvcnts, + displs, mpi_vtype, comm)); + } + else + { + CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype, + chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs, + mpi_vtype, proc, comm)); + } + + /* free processor-local chunk */ + if (local_hdata) + { + free (local_hdata); + } + + /* Now we got the global hyperslab data in a chunked array. + The user wants it unchunked, so let's sort it... */ + if (mapping->hdim > 1 && (proc < 0 || proc == myproc)) + { +// SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]); + free (chunked_hdata); + } +#endif } +#ifdef CCTK_MPI + free (displs); + free (local_hsize); +#endif + return (retval); } @@ -296,10 +470,6 @@ static int GetLocalHyperslab (const cGH *GH, /* if datatype conversion was requested get the appropriate predefined datatype conversion routine in case the user didn't supply one by her own */ - if (hdatatype < 0) - { - hdatatype = vinfo.vartype; - } conversion_fn = mapping->conversion_fn; if (vinfo.vartype != hdatatype) { @@ -488,49 +658,25 @@ static int GetDiagonalFromFrom3D (const cGH *GH, int hdatatype, void *hdata) { - int i, j, k, myproc, nprocs, linear_idx; - CCTK_INT local_npoints, npoints; + int i, j, k, npoints, myproc, linear_idx; int vdatatype, htypesize, vtypesize; const char *vdata; - char *local_hdata; - const pGH *pughGH; const pGExtras *extras; -#ifdef CCTK_MPI - CCTK_INT *global_npoints; - int *recvcnts, *displs; - MPI_Datatype mpidatatype; -#endif - - /* get the pGH pointer and the variable's GA structure */ - pughGH = PUGH_pGH (GH); - extras = ((const pGA *) pughGH->variables[vindex][timelevel])->extras; + /* get the variable's GA structure */ + extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras; vdatatype = CCTK_VarTypeI (vindex); - if (hdatatype < 0) - { - hdatatype = vdatatype; - } htypesize = CCTK_VarTypeSize (hdatatype); vtypesize = CCTK_VarTypeSize (vdatatype); vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex); myproc = CCTK_MyProc (GH); - nprocs = CCTK_nProcs (GH); - if (nprocs == 1) - { - local_hdata = (char *) hdata; - } - else - { - local_hdata = (char *) malloc (mapping->global_hsize[0] * htypesize); - } i = mapping->global_startpoint[0] - extras->lb[myproc][0]; j = mapping->global_startpoint[1] - extras->lb[myproc][1]; k = mapping->global_startpoint[2] - extras->lb[myproc][2]; - local_npoints = 0; for (npoints = 0; npoints < mapping->global_hsize[0]; npoints++) { if (i >= extras->ownership[0][0][0] && i < extras->ownership[0][1][0] && @@ -540,94 +686,18 @@ static int GetDiagonalFromFrom3D (const cGH *GH, linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2]; if (vdatatype != hdatatype) { - mapping->conversion_fn (vdata + linear_idx*vtypesize, local_hdata, - 1, 1, 1); + mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1); } else { - memcpy (local_hdata, vdata + linear_idx*vtypesize, htypesize); + memcpy (hdata, vdata + linear_idx*vtypesize, htypesize); } - local_hdata += htypesize; - local_npoints++; + hdata = (char *) hdata + htypesize; } i += mapping->downsample[0]; j += mapping->downsample[1]; k += mapping->downsample[2]; } - local_hdata -= local_npoints * htypesize; - -#ifdef CCTK_MPI - if (nprocs > 1) - { - /* allocate communication buffers */ - myproc = CCTK_MyProc (GH); - if (proc < 0 || proc == myproc) - { - global_npoints = (CCTK_INT *) malloc (nprocs * sizeof (CCTK_INT)); - recvcnts = (int *) malloc (2 * nprocs * sizeof (int)); - displs = recvcnts + nprocs; - } - else - { - global_npoints = NULL; recvcnts = displs = NULL; hdata = NULL; - } - - /* gather the number of local points on each processor */ - if (proc < 0) - { - CACTUS_MPI_ERROR (MPI_Allgather (&local_npoints, 1, PUGH_MPI_INT, - global_npoints, 1, PUGH_MPI_INT, - pughGH->PUGH_COMM_WORLD)); - } - else - { - CACTUS_MPI_ERROR (MPI_Gather (&local_npoints, 1, PUGH_MPI_INT, - global_npoints, 1, PUGH_MPI_INT, - proc, pughGH->PUGH_COMM_WORLD)); - } - - /* compute the receive count and displacement vectors */ - if (proc < 0 || proc == myproc) - { - for (i = 0; i < nprocs; i++) - { - recvcnts[i] = (int) global_npoints[i]; - displs[i] = i == 0 ? 0 : displs[i-1] + recvcnts[i-1]; - } - } - - /* get the MPI datatype for the hyperslab data */ - mpidatatype = PUGH_MPIDataType (pughGH, hdatatype); - - /* gather the local hyperslab data from each processor */ - if (proc < 0) - { - CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, local_npoints, - mpidatatype, hdata, recvcnts, displs, - mpidatatype, pughGH->PUGH_COMM_WORLD)); - } - else - { - CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, local_npoints, - mpidatatype, hdata, recvcnts, displs, - mpidatatype, proc, - pughGH->PUGH_COMM_WORLD)); - } - - /* free communication buffers */ - if (proc < 0 || proc == myproc) - { - free (global_npoints); - free (recvcnts); - } - } -#endif /* CCTK_MPI */ - - /* free allocated resources */ - if (nprocs > 1) - { - free (local_hdata); - } return (0); } diff --git a/src/Mapping.c b/src/Mapping.c index 0a701c1..87c8f7d 100644 --- a/src/Mapping.c +++ b/src/Mapping.c @@ -76,11 +76,12 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( int retval; int stagger_index; int myproc; - int npoints; + int i, j, k, npoints; hslab_mapping_t *mapping; const char *error_msg; const pGH *pughGH; /* pointer to the current pGH */ const pGA *GA; /* the variable's GA structure from PUGH */ + const pGExtras *extras; cGroup vinfo; @@ -146,7 +147,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( mapping = (hslab_mapping_t *) malloc (sizeof (hslab_mapping_t)); if (mapping) { - mapping->vectors = (int *) malloc ((6*vinfo.dim + 2*dim) * sizeof (int)); + mapping->vectors = (int *) malloc ((6*vinfo.dim + 3*dim) * sizeof (int)); } if (mapping == NULL || mapping->vectors == NULL) { @@ -180,6 +181,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( mapping->downsample = mapping->vectors + 5*vinfo.dim; mapping->local_hsize = mapping->vectors + 6*vinfo.dim + 0*dim; mapping->global_hsize = mapping->vectors + 6*vinfo.dim + 1*dim; + mapping->global_hoffset = mapping->vectors + 6*vinfo.dim + 2*dim; /* check direction vectors */ for (hdim = 0; hdim < mapping->hdim; hdim++) @@ -243,6 +245,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* get the pGH pointer and the variable's GA structure */ GA = (const pGA *) pughGH->variables[vindex][0]; + extras = GA->extras; myproc = CCTK_MyProc (GH); /* check extent */ @@ -250,7 +253,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( { if (mapping->do_dir[vdim] && hdim < mapping->hdim) { - if (origin[vdim] + extent[hdim] > GA->extras->nsize[vdim]) + if (origin[vdim] + extent[hdim] > extras->nsize[vdim]) { free (mapping->vectors); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, @@ -261,7 +264,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( hdim++; } else if (mapping->is_diagonal_in_3D && - origin[vdim] + extent[0] > GA->extras->nsize[vdim]) + origin[vdim] + extent[0] > extras->nsize[vdim]) { free (mapping->vectors); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, @@ -290,7 +293,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* subtract ghostzones for periodic BC */ if (GA->connectivity->perme[vdim]) { - mapping->global_hsize[hdim] -= 2 * GA->extras->nghostzones[vdim]; + mapping->global_hsize[hdim] -= 2 * extras->nghostzones[vdim]; } hdim++; } @@ -304,7 +307,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* subtract ghostzones for periodic BC */ if (GA->connectivity->perme[vdim]) { - mapping->totals -= 2 * GA->extras->nghostzones[vdim]; + mapping->totals -= 2 * extras->nghostzones[vdim]; } if ((unsigned int) mapping->global_hsize[0] > mapping->totals) { @@ -318,8 +321,8 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( if (mapping->is_full_hyperslab) { memset (mapping->local_startpoint, 0, vinfo.dim * sizeof (int)); - memcpy (mapping->local_endpoint, GA->extras->lnsize, vinfo.dim*sizeof(int)); - mapping->totals = GA->extras->npoints; + memcpy (mapping->local_endpoint, extras->lnsize, vinfo.dim*sizeof(int)); + mapping->totals = extras->npoints; } else if (mapping->is_diagonal_in_3D) { @@ -329,6 +332,24 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( mapping->downsample[vdim] = mapping->downsample[0]; mapping->global_startpoint[vdim] = origin[vdim]; } + + i = mapping->global_startpoint[0] - extras->lb[myproc][0]; + j = mapping->global_startpoint[1] - extras->lb[myproc][1]; + k = mapping->global_startpoint[2] - extras->lb[myproc][2]; + mapping->totals = 0; + for (npoints = 0; npoints < mapping->global_hsize[0]; npoints++) + { + if (i >= extras->ownership[0][0][0] && i < extras->ownership[0][1][0] && + j >= extras->ownership[0][0][1] && j < extras->ownership[0][1][1] && + k >= extras->ownership[0][0][2] && k < extras->ownership[0][1][2]) + { + mapping->totals++; + } + i += mapping->downsample[0]; + j += mapping->downsample[1]; + k += mapping->downsample[2]; + } + mapping->local_hsize[0] = mapping->totals; } else { @@ -344,14 +365,14 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( { stagger_index = CCTK_StaggerDirIndex (vdim, vinfo.stagtype); - if (origin[vdim] < MY_GLOBAL_EP (GA->extras, myproc, stagger_index, vdim)) + if (origin[vdim] < MY_GLOBAL_EP (extras, myproc, stagger_index, vdim)) { mapping->global_startpoint[vdim] = origin[vdim]; - if (origin[vdim] < MY_GLOBAL_SP (GA->extras, myproc,stagger_index,vdim)) + if (origin[vdim] < MY_GLOBAL_SP (extras, myproc,stagger_index,vdim)) { - npoints = (MY_GLOBAL_SP (GA->extras, myproc, stagger_index, vdim) + npoints = (MY_GLOBAL_SP (extras, myproc, stagger_index, vdim) - origin[vdim]) / mapping->downsample[vdim]; - if ((MY_GLOBAL_SP (GA->extras, myproc, stagger_index, vdim) + if ((MY_GLOBAL_SP (extras, myproc, stagger_index, vdim) - origin[vdim]) % mapping->downsample[vdim]) { npoints++; @@ -372,24 +393,24 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( stagger_index = CCTK_StaggerDirIndex (vdim, vinfo.stagtype); if (mapping->global_startpoint[vdim] >= 0 && - mapping->global_startpoint[vdim] < MY_GLOBAL_EP (GA->extras, myproc, + mapping->global_startpoint[vdim] < MY_GLOBAL_EP (extras, myproc, stagger_index,vdim)) { mapping->local_startpoint[vdim] = mapping->global_startpoint[vdim] - - GA->extras->lb[myproc][vdim]; + extras->lb[myproc][vdim]; } else { mapping->local_startpoint[vdim] = -1; } - if (mapping->global_endpoint[vdim] > MY_GLOBAL_SP (GA->extras, myproc, + if (mapping->global_endpoint[vdim] > MY_GLOBAL_SP (extras, myproc, stagger_index, vdim)) { mapping->local_endpoint[vdim] = - MIN (MY_LOCAL_EP (GA->extras, stagger_index, vdim), + MIN (MY_LOCAL_EP (extras, stagger_index, vdim), mapping->global_endpoint[vdim] - - GA->extras->lb[myproc][vdim]); + extras->lb[myproc][vdim]); } else { @@ -427,43 +448,44 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( } } /* end of else branch for 'if (mapping->is_full_hyperslab)' */ + fprintf (stderr, "proc %d: total number of local hyperslab data points: %d\n", CCTK_MyProc (GH), mapping->totals); +CCTK_Barrier (GH); #ifdef DEBUG printf ("total number of hyperslab data points: %d\n", mapping->totals); #endif -#if 0 - if (mapping->totals > 0) + /* calculate the offset of the local chunk into the global hyperslab */ + /* NOTE: the global offsets for diagonals are calculated later after + gathering all the local sizes */ + if (! mapping->is_diagonal_in_3D && mapping->totals > 0) { - /* if requested, compute the offsets into the global hyperslab */ - if (hoffset_global) + for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) { - for (i = hdim = 0; i < vinfo.dim; i++) + if (mapping->do_dir[vdim]) { - if (mapping->do_dir[i]) + if (mapping->is_full_hyperslab) { - if (mapping->is_full_hyperslab) - { - hoffset_global[hdim] = GA->extras->lb[myproc][i]; - } - else + mapping->global_hoffset[hdim] = extras->lb[myproc][vdim]; + } + else + { + mapping->global_hoffset[hdim] = (mapping->global_startpoint[vdim] - + origin[vdim]) / mapping->downsample[vdim]; + if (GA->connectivity->perme[vdim]) { - hoffset_global[hdim] = (mapping->global_startpoint[i] - - origin[i]) / mapping->downsample[i]; - if (GA->connectivity->perme[i]) - { - hoffset_global[hdim] -= GA->extras->nghostzones[i]; - } + mapping->global_hoffset[hdim] -= extras->nghostzones[vdim]; } + } +#define DEBUG 1 #ifdef DEBUG - printf ("hoffset_global, hsize in direction %d: %d, %d\n", - hdim, hoffset_global[hdim], mapping->local_hsize[hdim]); + fprintf (stderr, "proc %d: global_hoffset, local_hsize in direction " + "%d: %d, %d\n", CCTK_MyProc (GH), hdim, + mapping->global_hoffset[hdim], mapping->local_hsize[hdim]); #endif - hdim++; - } + hdim++; } } } -#endif /* add this mapping to the mapping list */ if (mapping_list) diff --git a/src/PUGHSlabi.h b/src/PUGHSlabi.h index 4a3c61c..6b46a36 100644 --- a/src/PUGHSlabi.h +++ b/src/PUGHSlabi.h @@ -34,6 +34,7 @@ typedef struct hslab_mapping_t int *do_dir; /* vdim */ int *downsample; /* vdim */ int *global_hsize; /* hdim */ + int *global_hoffset; /* hdim */ int *local_hsize; /* hdim */ unsigned int totals; int is_full_hyperslab; -- cgit v1.2.3