aboutsummaryrefslogtreecommitdiff
path: root/src/GetHyperslab.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/GetHyperslab.c')
-rw-r--r--src/GetHyperslab.c314
1 files changed, 192 insertions, 122 deletions
diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c
index ad7cbd5..479f2e9 100644
--- a/src/GetHyperslab.c
+++ b/src/GetHyperslab.c
@@ -80,42 +80,216 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
void *const *hdata /* num_arrays */,
CCTK_INT *retvals /* num_arrays */)
{
- int i, retval;
+ int i, nprocs, myproc, proc, timelevel, hdatatype, retval;
int result, *result_ptr;
hslab_mapping_t *mapping;
+ void *local_hdata;
+#ifdef CCTK_MPI
+ CCTK_INT totals_global, *chunk_hsize, *local_hsize;
+ MPI_Comm comm;
+ int *displs, *recvcnts;
+ void *chunked_hdata;
+ MPI_Datatype mpi_vtype;
+#endif
+ retval = 0;
mapping = PUGHSlabi_GetMapping (mapping_handle);
+ /*** FIXME: how to deal with local mapping ***/
if (mapping == NULL)
{
- return (-2);
+ retval = -1;
+ }
+ else
+ {
+ /* check mapping consistency */
+ /*** FIXME ***/
+ /* all vindices[] must match the given mapping */
+
+#if 0
+ NOTE mapping->totals denotes the number of LOCAL data points !!!
+ /* check if there's any data to extract */
+ if (mapping->totals <= 0)
+ {
+ retval = num_arrays;
+ }
+#endif
}
- /* check mapping consistency */
- /*** FIXME ***/
+ if (retval)
+ {
+ /* set the retvals[] according to the return value */
+ if (retvals)
+ {
+ for (i = 0; i < num_arrays; i++)
+ {
+ retvals[i] = retval > 0 ? 0 : retval;
+ }
+ }
+
+ return (retval);
+ }
+
+#ifdef CCTK_MPI
+ myproc = CCTK_MyProc (GH);
+ nprocs = CCTK_nProcs (GH);
+
+ /* now build the CCTK_INT buffer to communicate the sizes and offsets */
+ /*** FIXME: could also do this already when creating the mapping ***/
+ /*** could also cache the chunk_size in the mapping structure to
+ save following MPI_Allgather() calls but this makes
+ Hyperslab_DefineGlobalMapping() a collective call ***/
+ local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1)
+ * sizeof (CCTK_INT));
+ chunk_hsize = local_hsize + 2*mapping->hdim + 1;
+ for (i = 0; i < mapping->hdim; i++)
+ {
+ local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i];
+ local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i];
+ }
+ local_hsize[2*mapping->hdim + 0] = mapping->totals;
+
+ comm = PUGH_pGH (GH)->PUGH_COMM_WORLD;
+ /* FIXME: do an Allgather here so that all procs know how many data points
+ we have. This is useful to decide whether we have to gather
+ something at all or not and thus can return immediately */
+ CACTUS_MPI_ERROR (MPI_Allgather (local_hsize, 2*mapping->hdim + 1,
+ PUGH_MPI_INT, chunk_hsize,
+ 2*mapping->hdim + 1, PUGH_MPI_INT, comm));
+
+ /* sum up the total number of hyperslab data points */
+ totals_global = 0;
+ for (i = 0; i < nprocs; i++)
+ {
+ totals_global += chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim];
+ }
/* check if there's any data to extract */
- if (mapping->totals == 0)
+ if (totals_global <= 0)
{
+ if (retvals)
+ {
+ memset (retvals, 0, num_arrays * sizeof (CCTK_INT));
+ }
+ free (local_hsize);
return (0);
}
- retval = 0;
+ /* set the displacements/receive counts for the gather operation */
+ displs = (int *) malloc (2 * nprocs * sizeof (int));
+ recvcnts = displs + nprocs;
+
+ /* for hdim == 1 we let MPI_Gatherv() do the sorting directly into the
+ hyperslab buffer using the given hyperslab offsets and sizes */
+ if (mapping->hdim == 1)
+ {
+ for (i = 0; i < nprocs; i++)
+ {
+ recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim];
+ displs[i] = mapping->is_diagonal_in_3D ?
+ i == 0 ? 0 : displs[i-1] + recvcnts[i-1] :
+ chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim - 1];
+ }
+ }
+ else
+ {
+ displs[0] = 0;
+ recvcnts[0] = chunk_hsize[2*mapping->hdim];
+ for (i = 1; i < nprocs; i++)
+ {
+ displs[i] = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) +
+ 2*mapping->hdim];
+ recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim];
+ }
+ }
+#endif
+
+ /* now loop over all hyperslabs */
+ /*** FIXME: should optimize this loop away ***/
for (i = 0; i < num_arrays; i++)
{
- /* get the processor-local hyperslab */
+ proc = procs ? procs[i] : DEFAULT_PROCESSOR;
+ timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL;
+ hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]);
result_ptr = retvals ? &retvals[i] : &result;
- *result_ptr = GetLocalHyperslab (GH, mapping,
- procs ? procs[i] : DEFAULT_PROCESSOR,
- vindices[i],
- timelevels ? timelevels[i] : DEFAULT_TIMELEVEL,
- hdatatypes ? hdatatypes[i] : -1, hdata[i]);
+
+ if (mapping->totals <= 0)
+ {
+ local_hdata = NULL;
+ *result_ptr = 0;
+ }
+ else
+ {
+ /* allocate temporary array to keep the local hyperslab data */
+ local_hdata = nprocs == 1 ?
+ hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype));
+ /* get the processor-local hyperslab */
+ *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i],
+ timelevel, hdatatype, local_hdata);
+ }
+
if (*result_ptr == 0)
{
retval++;
}
+
+ if (nprocs == 1)
+ {
+ continue;
+ }
+
+#ifdef CCTK_MPI
+ if (proc < 0 || proc == myproc)
+ {
+ /* allocate temporary array to receive the chunks from all processors */
+ /* for the case of hdim == 1, the sorting is done by MPI */
+ chunked_hdata = mapping->hdim > 1 ?
+ malloc (totals_global * CCTK_VarTypeSize (hdatatype)) :
+ hdata[i];
+ }
+ else
+ {
+ chunked_hdata = NULL;
+ }
+
+ /* detect the MPI datatype to use */
+ mpi_vtype = PUGH_MPIDataType (PUGH_pGH (GH), hdatatype);
+
+ /* collect the hyperslab chunks from all processors */
+ if (proc < 0)
+ {
+ CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, mapping->totals,
+ mpi_vtype, chunked_hdata, recvcnts,
+ displs, mpi_vtype, comm));
+ }
+ else
+ {
+ CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype,
+ chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs,
+ mpi_vtype, proc, comm));
+ }
+
+ /* free processor-local chunk */
+ if (local_hdata)
+ {
+ free (local_hdata);
+ }
+
+ /* Now we got the global hyperslab data in a chunked array.
+ The user wants it unchunked, so let's sort it... */
+ if (mapping->hdim > 1 && (proc < 0 || proc == myproc))
+ {
+// SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]);
+ free (chunked_hdata);
+ }
+#endif
}
+#ifdef CCTK_MPI
+ free (displs);
+ free (local_hsize);
+#endif
+
return (retval);
}
@@ -296,10 +470,6 @@ static int GetLocalHyperslab (const cGH *GH,
/* if datatype conversion was requested
get the appropriate predefined datatype conversion routine
in case the user didn't supply one by her own */
- if (hdatatype < 0)
- {
- hdatatype = vinfo.vartype;
- }
conversion_fn = mapping->conversion_fn;
if (vinfo.vartype != hdatatype)
{
@@ -488,49 +658,25 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
int hdatatype,
void *hdata)
{
- int i, j, k, myproc, nprocs, linear_idx;
- CCTK_INT local_npoints, npoints;
+ int i, j, k, npoints, myproc, linear_idx;
int vdatatype, htypesize, vtypesize;
const char *vdata;
- char *local_hdata;
- const pGH *pughGH;
const pGExtras *extras;
-#ifdef CCTK_MPI
- CCTK_INT *global_npoints;
- int *recvcnts, *displs;
- MPI_Datatype mpidatatype;
-#endif
-
- /* get the pGH pointer and the variable's GA structure */
- pughGH = PUGH_pGH (GH);
- extras = ((const pGA *) pughGH->variables[vindex][timelevel])->extras;
+ /* get the variable's GA structure */
+ extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras;
vdatatype = CCTK_VarTypeI (vindex);
- if (hdatatype < 0)
- {
- hdatatype = vdatatype;
- }
htypesize = CCTK_VarTypeSize (hdatatype);
vtypesize = CCTK_VarTypeSize (vdatatype);
vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex);
myproc = CCTK_MyProc (GH);
- nprocs = CCTK_nProcs (GH);
- if (nprocs == 1)
- {
- local_hdata = (char *) hdata;
- }
- else
- {
- local_hdata = (char *) malloc (mapping->global_hsize[0] * htypesize);
- }
i = mapping->global_startpoint[0] - extras->lb[myproc][0];
j = mapping->global_startpoint[1] - extras->lb[myproc][1];
k = mapping->global_startpoint[2] - extras->lb[myproc][2];
- local_npoints = 0;
for (npoints = 0; npoints < mapping->global_hsize[0]; npoints++)
{
if (i >= extras->ownership[0][0][0] && i < extras->ownership[0][1][0] &&
@@ -540,94 +686,18 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2];
if (vdatatype != hdatatype)
{
- mapping->conversion_fn (vdata + linear_idx*vtypesize, local_hdata,
- 1, 1, 1);
+ mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1);
}
else
{
- memcpy (local_hdata, vdata + linear_idx*vtypesize, htypesize);
+ memcpy (hdata, vdata + linear_idx*vtypesize, htypesize);
}
- local_hdata += htypesize;
- local_npoints++;
+ hdata = (char *) hdata + htypesize;
}
i += mapping->downsample[0];
j += mapping->downsample[1];
k += mapping->downsample[2];
}
- local_hdata -= local_npoints * htypesize;
-
-#ifdef CCTK_MPI
- if (nprocs > 1)
- {
- /* allocate communication buffers */
- myproc = CCTK_MyProc (GH);
- if (proc < 0 || proc == myproc)
- {
- global_npoints = (CCTK_INT *) malloc (nprocs * sizeof (CCTK_INT));
- recvcnts = (int *) malloc (2 * nprocs * sizeof (int));
- displs = recvcnts + nprocs;
- }
- else
- {
- global_npoints = NULL; recvcnts = displs = NULL; hdata = NULL;
- }
-
- /* gather the number of local points on each processor */
- if (proc < 0)
- {
- CACTUS_MPI_ERROR (MPI_Allgather (&local_npoints, 1, PUGH_MPI_INT,
- global_npoints, 1, PUGH_MPI_INT,
- pughGH->PUGH_COMM_WORLD));
- }
- else
- {
- CACTUS_MPI_ERROR (MPI_Gather (&local_npoints, 1, PUGH_MPI_INT,
- global_npoints, 1, PUGH_MPI_INT,
- proc, pughGH->PUGH_COMM_WORLD));
- }
-
- /* compute the receive count and displacement vectors */
- if (proc < 0 || proc == myproc)
- {
- for (i = 0; i < nprocs; i++)
- {
- recvcnts[i] = (int) global_npoints[i];
- displs[i] = i == 0 ? 0 : displs[i-1] + recvcnts[i-1];
- }
- }
-
- /* get the MPI datatype for the hyperslab data */
- mpidatatype = PUGH_MPIDataType (pughGH, hdatatype);
-
- /* gather the local hyperslab data from each processor */
- if (proc < 0)
- {
- CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, local_npoints,
- mpidatatype, hdata, recvcnts, displs,
- mpidatatype, pughGH->PUGH_COMM_WORLD));
- }
- else
- {
- CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, local_npoints,
- mpidatatype, hdata, recvcnts, displs,
- mpidatatype, proc,
- pughGH->PUGH_COMM_WORLD));
- }
-
- /* free communication buffers */
- if (proc < 0 || proc == myproc)
- {
- free (global_npoints);
- free (recvcnts);
- }
- }
-#endif /* CCTK_MPI */
-
- /* free allocated resources */
- if (nprocs > 1)
- {
- free (local_hdata);
- }
return (0);
}