diff options
Diffstat (limited to 'src/GetHyperslab.c')
-rw-r--r-- | src/GetHyperslab.c | 314 |
1 files changed, 192 insertions, 122 deletions
diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c index ad7cbd5..479f2e9 100644 --- a/src/GetHyperslab.c +++ b/src/GetHyperslab.c @@ -80,42 +80,216 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, void *const *hdata /* num_arrays */, CCTK_INT *retvals /* num_arrays */) { - int i, retval; + int i, nprocs, myproc, proc, timelevel, hdatatype, retval; int result, *result_ptr; hslab_mapping_t *mapping; + void *local_hdata; +#ifdef CCTK_MPI + CCTK_INT totals_global, *chunk_hsize, *local_hsize; + MPI_Comm comm; + int *displs, *recvcnts; + void *chunked_hdata; + MPI_Datatype mpi_vtype; +#endif + retval = 0; mapping = PUGHSlabi_GetMapping (mapping_handle); + /*** FIXME: how to deal with local mapping ***/ if (mapping == NULL) { - return (-2); + retval = -1; + } + else + { + /* check mapping consistency */ + /*** FIXME ***/ + /* all vindices[] must match the given mapping */ + +#if 0 + NOTE mapping->totals denotes the number of LOCAL data points !!! + /* check if there's any data to extract */ + if (mapping->totals <= 0) + { + retval = num_arrays; + } +#endif } - /* check mapping consistency */ - /*** FIXME ***/ + if (retval) + { + /* set the retvals[] according to the return value */ + if (retvals) + { + for (i = 0; i < num_arrays; i++) + { + retvals[i] = retval > 0 ? 0 : retval; + } + } + + return (retval); + } + +#ifdef CCTK_MPI + myproc = CCTK_MyProc (GH); + nprocs = CCTK_nProcs (GH); + + /* now build the CCTK_INT buffer to communicate the sizes and offsets */ + /*** FIXME: could also do this already when creating the mapping ***/ + /*** could also cache the chunk_size in the mapping structure to + save following MPI_Allgather() calls but this makes + Hyperslab_DefineGlobalMapping() a collective call ***/ + local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1) + * sizeof (CCTK_INT)); + chunk_hsize = local_hsize + 2*mapping->hdim + 1; + for (i = 0; i < mapping->hdim; i++) + { + local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i]; + local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i]; + } + local_hsize[2*mapping->hdim + 0] = mapping->totals; + + comm = PUGH_pGH (GH)->PUGH_COMM_WORLD; + /* FIXME: do an Allgather here so that all procs know how many data points + we have. This is useful to decide whether we have to gather + something at all or not and thus can return immediately */ + CACTUS_MPI_ERROR (MPI_Allgather (local_hsize, 2*mapping->hdim + 1, + PUGH_MPI_INT, chunk_hsize, + 2*mapping->hdim + 1, PUGH_MPI_INT, comm)); + + /* sum up the total number of hyperslab data points */ + totals_global = 0; + for (i = 0; i < nprocs; i++) + { + totals_global += chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + } /* check if there's any data to extract */ - if (mapping->totals == 0) + if (totals_global <= 0) { + if (retvals) + { + memset (retvals, 0, num_arrays * sizeof (CCTK_INT)); + } + free (local_hsize); return (0); } - retval = 0; + /* set the displacements/receive counts for the gather operation */ + displs = (int *) malloc (2 * nprocs * sizeof (int)); + recvcnts = displs + nprocs; + + /* for hdim == 1 we let MPI_Gatherv() do the sorting directly into the + hyperslab buffer using the given hyperslab offsets and sizes */ + if (mapping->hdim == 1) + { + for (i = 0; i < nprocs; i++) + { + recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + displs[i] = mapping->is_diagonal_in_3D ? + i == 0 ? 0 : displs[i-1] + recvcnts[i-1] : + chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim - 1]; + } + } + else + { + displs[0] = 0; + recvcnts[0] = chunk_hsize[2*mapping->hdim]; + for (i = 1; i < nprocs; i++) + { + displs[i] = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) + + 2*mapping->hdim]; + recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + } + } +#endif + + /* now loop over all hyperslabs */ + /*** FIXME: should optimize this loop away ***/ for (i = 0; i < num_arrays; i++) { - /* get the processor-local hyperslab */ + proc = procs ? procs[i] : DEFAULT_PROCESSOR; + timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL; + hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]); result_ptr = retvals ? &retvals[i] : &result; - *result_ptr = GetLocalHyperslab (GH, mapping, - procs ? procs[i] : DEFAULT_PROCESSOR, - vindices[i], - timelevels ? timelevels[i] : DEFAULT_TIMELEVEL, - hdatatypes ? hdatatypes[i] : -1, hdata[i]); + + if (mapping->totals <= 0) + { + local_hdata = NULL; + *result_ptr = 0; + } + else + { + /* allocate temporary array to keep the local hyperslab data */ + local_hdata = nprocs == 1 ? + hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype)); + /* get the processor-local hyperslab */ + *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i], + timelevel, hdatatype, local_hdata); + } + if (*result_ptr == 0) { retval++; } + + if (nprocs == 1) + { + continue; + } + +#ifdef CCTK_MPI + if (proc < 0 || proc == myproc) + { + /* allocate temporary array to receive the chunks from all processors */ + /* for the case of hdim == 1, the sorting is done by MPI */ + chunked_hdata = mapping->hdim > 1 ? + malloc (totals_global * CCTK_VarTypeSize (hdatatype)) : + hdata[i]; + } + else + { + chunked_hdata = NULL; + } + + /* detect the MPI datatype to use */ + mpi_vtype = PUGH_MPIDataType (PUGH_pGH (GH), hdatatype); + + /* collect the hyperslab chunks from all processors */ + if (proc < 0) + { + CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, mapping->totals, + mpi_vtype, chunked_hdata, recvcnts, + displs, mpi_vtype, comm)); + } + else + { + CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype, + chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs, + mpi_vtype, proc, comm)); + } + + /* free processor-local chunk */ + if (local_hdata) + { + free (local_hdata); + } + + /* Now we got the global hyperslab data in a chunked array. + The user wants it unchunked, so let's sort it... */ + if (mapping->hdim > 1 && (proc < 0 || proc == myproc)) + { +// SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]); + free (chunked_hdata); + } +#endif } +#ifdef CCTK_MPI + free (displs); + free (local_hsize); +#endif + return (retval); } @@ -296,10 +470,6 @@ static int GetLocalHyperslab (const cGH *GH, /* if datatype conversion was requested get the appropriate predefined datatype conversion routine in case the user didn't supply one by her own */ - if (hdatatype < 0) - { - hdatatype = vinfo.vartype; - } conversion_fn = mapping->conversion_fn; if (vinfo.vartype != hdatatype) { @@ -488,49 +658,25 @@ static int GetDiagonalFromFrom3D (const cGH *GH, int hdatatype, void *hdata) { - int i, j, k, myproc, nprocs, linear_idx; - CCTK_INT local_npoints, npoints; + int i, j, k, npoints, myproc, linear_idx; int vdatatype, htypesize, vtypesize; const char *vdata; - char *local_hdata; - const pGH *pughGH; const pGExtras *extras; -#ifdef CCTK_MPI - CCTK_INT *global_npoints; - int *recvcnts, *displs; - MPI_Datatype mpidatatype; -#endif - - /* get the pGH pointer and the variable's GA structure */ - pughGH = PUGH_pGH (GH); - extras = ((const pGA *) pughGH->variables[vindex][timelevel])->extras; + /* get the variable's GA structure */ + extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras; vdatatype = CCTK_VarTypeI (vindex); - if (hdatatype < 0) - { - hdatatype = vdatatype; - } htypesize = CCTK_VarTypeSize (hdatatype); vtypesize = CCTK_VarTypeSize (vdatatype); vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex); myproc = CCTK_MyProc (GH); - nprocs = CCTK_nProcs (GH); - if (nprocs == 1) - { - local_hdata = (char *) hdata; - } - else - { - local_hdata = (char *) malloc (mapping->global_hsize[0] * htypesize); - } i = mapping->global_startpoint[0] - extras->lb[myproc][0]; j = mapping->global_startpoint[1] - extras->lb[myproc][1]; k = mapping->global_startpoint[2] - extras->lb[myproc][2]; - local_npoints = 0; for (npoints = 0; npoints < mapping->global_hsize[0]; npoints++) { if (i >= extras->ownership[0][0][0] && i < extras->ownership[0][1][0] && @@ -540,94 +686,18 @@ static int GetDiagonalFromFrom3D (const cGH *GH, linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2]; if (vdatatype != hdatatype) { - mapping->conversion_fn (vdata + linear_idx*vtypesize, local_hdata, - 1, 1, 1); + mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1); } else { - memcpy (local_hdata, vdata + linear_idx*vtypesize, htypesize); + memcpy (hdata, vdata + linear_idx*vtypesize, htypesize); } - local_hdata += htypesize; - local_npoints++; + hdata = (char *) hdata + htypesize; } i += mapping->downsample[0]; j += mapping->downsample[1]; k += mapping->downsample[2]; } - local_hdata -= local_npoints * htypesize; - -#ifdef CCTK_MPI - if (nprocs > 1) - { - /* allocate communication buffers */ - myproc = CCTK_MyProc (GH); - if (proc < 0 || proc == myproc) - { - global_npoints = (CCTK_INT *) malloc (nprocs * sizeof (CCTK_INT)); - recvcnts = (int *) malloc (2 * nprocs * sizeof (int)); - displs = recvcnts + nprocs; - } - else - { - global_npoints = NULL; recvcnts = displs = NULL; hdata = NULL; - } - - /* gather the number of local points on each processor */ - if (proc < 0) - { - CACTUS_MPI_ERROR (MPI_Allgather (&local_npoints, 1, PUGH_MPI_INT, - global_npoints, 1, PUGH_MPI_INT, - pughGH->PUGH_COMM_WORLD)); - } - else - { - CACTUS_MPI_ERROR (MPI_Gather (&local_npoints, 1, PUGH_MPI_INT, - global_npoints, 1, PUGH_MPI_INT, - proc, pughGH->PUGH_COMM_WORLD)); - } - - /* compute the receive count and displacement vectors */ - if (proc < 0 || proc == myproc) - { - for (i = 0; i < nprocs; i++) - { - recvcnts[i] = (int) global_npoints[i]; - displs[i] = i == 0 ? 0 : displs[i-1] + recvcnts[i-1]; - } - } - - /* get the MPI datatype for the hyperslab data */ - mpidatatype = PUGH_MPIDataType (pughGH, hdatatype); - - /* gather the local hyperslab data from each processor */ - if (proc < 0) - { - CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, local_npoints, - mpidatatype, hdata, recvcnts, displs, - mpidatatype, pughGH->PUGH_COMM_WORLD)); - } - else - { - CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, local_npoints, - mpidatatype, hdata, recvcnts, displs, - mpidatatype, proc, - pughGH->PUGH_COMM_WORLD)); - } - - /* free communication buffers */ - if (proc < 0 || proc == myproc) - { - free (global_npoints); - free (recvcnts); - } - } -#endif /* CCTK_MPI */ - - /* free allocated resources */ - if (nprocs > 1) - { - free (local_hdata); - } return (0); } |