aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035>2002-04-13 21:06:58 +0000
committertradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035>2002-04-13 21:06:58 +0000
commit0c70c0bf78a59733589be194ac4971400c581651 (patch)
tree2881acbe9ac9567d136b9b06ff782bc22b23c8ef
parentd5d7f1405dc8676013568eef37b9ba4acd96c352 (diff)
Sort processor-local chunks into an unchuned hyperslab.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHSlab/trunk@81 10716dce-81a3-4424-a2c8-48026a0d3035
-rw-r--r--src/GetHyperslab.c138
1 files changed, 119 insertions, 19 deletions
diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c
index 1ad9a9c..71ff33d 100644
--- a/src/GetHyperslab.c
+++ b/src/GetHyperslab.c
@@ -49,6 +49,12 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
int timelevel,
int hdatatype,
void *hdata);
+static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping,
+ const void *chunked_hdata,
+ void *unchunked_hdata,
+ int nprocs,
+ const CCTK_INT *chunk_hsize,
+ int hdata_size);
CCTK_INT Hyperslab_Get (const cGH *GH,
@@ -79,7 +85,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
void *const *hdata /* num_arrays */,
CCTK_INT *retvals /* num_arrays */)
{
- int i, nprocs, proc, timelevel, hdatatype, retval;
+ int i, nprocs, proc, timelevel, hdatatype, hdata_size, retval;
CCTK_INT result, *result_ptr;
hslab_mapping_t *mapping;
void *local_hdata;
@@ -143,7 +149,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1)
* sizeof (CCTK_INT));
chunk_hsize = local_hsize + 2*mapping->hdim + 1;
- for (i = 0; i < mapping->hdim; i++)
+ for (i = 0; i < (int) mapping->hdim; i++)
{
local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i];
local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i];
@@ -196,11 +202,11 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
{
displs[0] = 0;
recvcnts[0] = chunk_hsize[2*mapping->hdim];
- for (i = 1; i < nprocs; i++)
+ for (proc = 1; proc < nprocs; proc++)
{
- displs[i] = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) +
- 2*mapping->hdim];
- recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim];
+ displs[proc] = displs[proc-1] + chunk_hsize[(proc-1)*(2*mapping->hdim+1) +
+ 2*mapping->hdim];
+ recvcnts[proc] = chunk_hsize[proc*(2*mapping->hdim+1) + 2*mapping->hdim];
}
}
#endif
@@ -212,6 +218,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
proc = procs ? procs[i] : DEFAULT_PROCESSOR;
timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL;
hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]);
+ hdata_size = CCTK_VarTypeSize (hdatatype);
result_ptr = retvals ? &retvals[i] : &result;
if (mapping->totals <= 0)
@@ -223,7 +230,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
{
/* allocate temporary array to keep the local hyperslab data */
local_hdata = nprocs == 1 ?
- hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype));
+ hdata[i] : malloc (mapping->totals * hdata_size);
/* get the processor-local hyperslab */
*result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i],
timelevel, hdatatype, local_hdata);
@@ -245,8 +252,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
/* allocate temporary array to receive the chunks from all processors */
/* for the case of hdim == 1, the sorting is done by MPI */
chunked_hdata = mapping->hdim > 1 ?
- malloc (totals_global * CCTK_VarTypeSize (hdatatype)) :
- hdata[i];
+ malloc (totals_global * hdata_size) : hdata[i];
}
else
{
@@ -265,9 +271,9 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
}
else
{
- CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype,
- chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs,
- mpi_vtype, proc, comm));
+ CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, mapping->totals,
+ mpi_vtype, chunked_hdata, recvcnts,
+ displs, mpi_vtype, proc, comm));
}
/* free processor-local chunk */
@@ -280,7 +286,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
The user wants it unchunked, so let's sort it... */
if (mapping->hdim > 1 && (proc < 0 || proc == myproc))
{
-/* SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]); */
+ SortIntoUnchunkedHyperslab (mapping, chunked_hdata, hdata[i], nprocs, chunk_hsize, hdata_size);
free (chunked_hdata);
}
#endif
@@ -659,7 +665,7 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
void *hdata)
{
int i, j, k, npoints, myproc, linear_idx;
- int vdatatype, htypesize, vtypesize;
+ int vdatatype, hdata_size, vdata_size;
const char *vdata;
const pGExtras *extras;
@@ -668,8 +674,8 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras;
vdatatype = CCTK_VarTypeI (vindex);
- htypesize = CCTK_VarTypeSize (hdatatype);
- vtypesize = CCTK_VarTypeSize (vdatatype);
+ hdata_size = CCTK_VarTypeSize (hdatatype);
+ vdata_size = CCTK_VarTypeSize (vdatatype);
vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex);
myproc = CCTK_MyProc (GH);
@@ -686,13 +692,13 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2];
if (vdatatype != hdatatype)
{
- mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1);
+ mapping->conversion_fn (vdata + linear_idx*vdata_size, hdata, 1, 1, 1);
}
else
{
- memcpy (hdata, vdata + linear_idx*vtypesize, htypesize);
+ memcpy (hdata, vdata + linear_idx*vdata_size, hdata_size);
}
- hdata = (char *) hdata + htypesize;
+ hdata = (char *) hdata + hdata_size;
}
i += mapping->downsample[0];
j += mapping->downsample[1];
@@ -701,3 +707,97 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
return (0);
}
+
+
+static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping,
+ const void *chunked_hdata,
+ void *unchunked_hdata,
+ int nprocs,
+ const CCTK_INT *chunk_hsize,
+ int hdata_size)
+{
+ int i, j, proc, linear_hoffset;
+ int *point, *points_per_hdim;
+ char *copy_to;
+ const char *copy_from;
+ const CCTK_INT *hsize_chunk, *hoffset_chunk;
+
+
+ /* allocate temporary vectors */
+ point = (int *) malloc (2 * mapping->hdim * sizeof (int));
+ points_per_hdim = point + mapping->hdim;
+
+ points_per_hdim[0] = 1;
+ for (i = 1; i < (int) mapping->hdim; i++)
+ {
+ points_per_hdim[i] = points_per_hdim[i-1] * mapping->global_hsize[i-1];
+ }
+
+ /* use char pointers for easy incrementing when copying */
+ copy_from = (const char *) chunked_hdata;
+ copy_to = (char *) unchunked_hdata;
+
+ /* now copy the chunks from each processor into the global hyperslab */
+ for (proc = 0; proc < nprocs; proc++)
+ {
+ /* skip processors which didn't contribute any data */
+ if (chunk_hsize[proc * (2*mapping->hdim + 1) + 2*mapping->hdim] <= 0)
+ {
+ continue;
+ }
+
+ hsize_chunk = chunk_hsize + proc * (2*mapping->hdim+1);
+ hoffset_chunk = hsize_chunk + mapping->hdim;
+
+ /* set startpoint to zero */
+ memset (point, 0, mapping->hdim * sizeof (int));
+
+ i = 1;
+ while (1)
+ {
+ /* check for end of current loop */
+ if (point[i] >= hsize_chunk[i])
+ {
+ /* increment outermost loopers */
+ for (i++; i < (int) mapping->hdim; i++)
+ {
+ if (++point[i] < hsize_chunk[i])
+ {
+ break;
+ }
+ }
+
+ /* done if beyond outermost loop */
+ if (i >= (int) mapping->hdim)
+ {
+ break;
+ }
+
+ /* reset innermost loopers */
+ for (i--; i > 0; i--)
+ {
+ point[i] = 0;
+ }
+ i = 1;
+ }
+
+ /* get the linear offset */
+ linear_hoffset = hoffset_chunk[0];
+ for (j = 1; j < (int) mapping->hdim; j++)
+ {
+ linear_hoffset += (hoffset_chunk[j] + point[j]) * points_per_hdim[j];
+ }
+ /* copy the line */
+ memcpy (copy_to + linear_hoffset * hdata_size, copy_from,
+ hsize_chunk[0] * hdata_size);
+ copy_from += hsize_chunk[0] * hdata_size;
+
+ /* increment current looper */
+ point[i]++;
+
+ } /* end of nested loops over all dimensions */
+ } /* end of loop over all processors */
+
+ /* free allocated temporary vectors */
+ free (point);
+}