diff options
author | tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035> | 2002-04-13 21:06:58 +0000 |
---|---|---|
committer | tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035> | 2002-04-13 21:06:58 +0000 |
commit | 0c70c0bf78a59733589be194ac4971400c581651 (patch) | |
tree | 2881acbe9ac9567d136b9b06ff782bc22b23c8ef | |
parent | d5d7f1405dc8676013568eef37b9ba4acd96c352 (diff) |
Sort processor-local chunks into an unchuned hyperslab.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHSlab/trunk@81 10716dce-81a3-4424-a2c8-48026a0d3035
-rw-r--r-- | src/GetHyperslab.c | 138 |
1 files changed, 119 insertions, 19 deletions
diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c index 1ad9a9c..71ff33d 100644 --- a/src/GetHyperslab.c +++ b/src/GetHyperslab.c @@ -49,6 +49,12 @@ static int GetDiagonalFromFrom3D (const cGH *GH, int timelevel, int hdatatype, void *hdata); +static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, + const void *chunked_hdata, + void *unchunked_hdata, + int nprocs, + const CCTK_INT *chunk_hsize, + int hdata_size); CCTK_INT Hyperslab_Get (const cGH *GH, @@ -79,7 +85,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, void *const *hdata /* num_arrays */, CCTK_INT *retvals /* num_arrays */) { - int i, nprocs, proc, timelevel, hdatatype, retval; + int i, nprocs, proc, timelevel, hdatatype, hdata_size, retval; CCTK_INT result, *result_ptr; hslab_mapping_t *mapping; void *local_hdata; @@ -143,7 +149,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1) * sizeof (CCTK_INT)); chunk_hsize = local_hsize + 2*mapping->hdim + 1; - for (i = 0; i < mapping->hdim; i++) + for (i = 0; i < (int) mapping->hdim; i++) { local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i]; local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i]; @@ -196,11 +202,11 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, { displs[0] = 0; recvcnts[0] = chunk_hsize[2*mapping->hdim]; - for (i = 1; i < nprocs; i++) + for (proc = 1; proc < nprocs; proc++) { - displs[i] = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) + - 2*mapping->hdim]; - recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; + displs[proc] = displs[proc-1] + chunk_hsize[(proc-1)*(2*mapping->hdim+1) + + 2*mapping->hdim]; + recvcnts[proc] = chunk_hsize[proc*(2*mapping->hdim+1) + 2*mapping->hdim]; } } #endif @@ -212,6 +218,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, proc = procs ? procs[i] : DEFAULT_PROCESSOR; timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL; hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]); + hdata_size = CCTK_VarTypeSize (hdatatype); result_ptr = retvals ? &retvals[i] : &result; if (mapping->totals <= 0) @@ -223,7 +230,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, { /* allocate temporary array to keep the local hyperslab data */ local_hdata = nprocs == 1 ? - hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype)); + hdata[i] : malloc (mapping->totals * hdata_size); /* get the processor-local hyperslab */ *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i], timelevel, hdatatype, local_hdata); @@ -245,8 +252,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, /* allocate temporary array to receive the chunks from all processors */ /* for the case of hdim == 1, the sorting is done by MPI */ chunked_hdata = mapping->hdim > 1 ? - malloc (totals_global * CCTK_VarTypeSize (hdatatype)) : - hdata[i]; + malloc (totals_global * hdata_size) : hdata[i]; } else { @@ -265,9 +271,9 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, } else { - CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype, - chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs, - mpi_vtype, proc, comm)); + CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, mapping->totals, + mpi_vtype, chunked_hdata, recvcnts, + displs, mpi_vtype, proc, comm)); } /* free processor-local chunk */ @@ -280,7 +286,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, The user wants it unchunked, so let's sort it... */ if (mapping->hdim > 1 && (proc < 0 || proc == myproc)) { -/* SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]); */ + SortIntoUnchunkedHyperslab (mapping, chunked_hdata, hdata[i], nprocs, chunk_hsize, hdata_size); free (chunked_hdata); } #endif @@ -659,7 +665,7 @@ static int GetDiagonalFromFrom3D (const cGH *GH, void *hdata) { int i, j, k, npoints, myproc, linear_idx; - int vdatatype, htypesize, vtypesize; + int vdatatype, hdata_size, vdata_size; const char *vdata; const pGExtras *extras; @@ -668,8 +674,8 @@ static int GetDiagonalFromFrom3D (const cGH *GH, extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras; vdatatype = CCTK_VarTypeI (vindex); - htypesize = CCTK_VarTypeSize (hdatatype); - vtypesize = CCTK_VarTypeSize (vdatatype); + hdata_size = CCTK_VarTypeSize (hdatatype); + vdata_size = CCTK_VarTypeSize (vdatatype); vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex); myproc = CCTK_MyProc (GH); @@ -686,13 +692,13 @@ static int GetDiagonalFromFrom3D (const cGH *GH, linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2]; if (vdatatype != hdatatype) { - mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1); + mapping->conversion_fn (vdata + linear_idx*vdata_size, hdata, 1, 1, 1); } else { - memcpy (hdata, vdata + linear_idx*vtypesize, htypesize); + memcpy (hdata, vdata + linear_idx*vdata_size, hdata_size); } - hdata = (char *) hdata + htypesize; + hdata = (char *) hdata + hdata_size; } i += mapping->downsample[0]; j += mapping->downsample[1]; @@ -701,3 +707,97 @@ static int GetDiagonalFromFrom3D (const cGH *GH, return (0); } + + +static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, + const void *chunked_hdata, + void *unchunked_hdata, + int nprocs, + const CCTK_INT *chunk_hsize, + int hdata_size) +{ + int i, j, proc, linear_hoffset; + int *point, *points_per_hdim; + char *copy_to; + const char *copy_from; + const CCTK_INT *hsize_chunk, *hoffset_chunk; + + + /* allocate temporary vectors */ + point = (int *) malloc (2 * mapping->hdim * sizeof (int)); + points_per_hdim = point + mapping->hdim; + + points_per_hdim[0] = 1; + for (i = 1; i < (int) mapping->hdim; i++) + { + points_per_hdim[i] = points_per_hdim[i-1] * mapping->global_hsize[i-1]; + } + + /* use char pointers for easy incrementing when copying */ + copy_from = (const char *) chunked_hdata; + copy_to = (char *) unchunked_hdata; + + /* now copy the chunks from each processor into the global hyperslab */ + for (proc = 0; proc < nprocs; proc++) + { + /* skip processors which didn't contribute any data */ + if (chunk_hsize[proc * (2*mapping->hdim + 1) + 2*mapping->hdim] <= 0) + { + continue; + } + + hsize_chunk = chunk_hsize + proc * (2*mapping->hdim+1); + hoffset_chunk = hsize_chunk + mapping->hdim; + + /* set startpoint to zero */ + memset (point, 0, mapping->hdim * sizeof (int)); + + i = 1; + while (1) + { + /* check for end of current loop */ + if (point[i] >= hsize_chunk[i]) + { + /* increment outermost loopers */ + for (i++; i < (int) mapping->hdim; i++) + { + if (++point[i] < hsize_chunk[i]) + { + break; + } + } + + /* done if beyond outermost loop */ + if (i >= (int) mapping->hdim) + { + break; + } + + /* reset innermost loopers */ + for (i--; i > 0; i--) + { + point[i] = 0; + } + i = 1; + } + + /* get the linear offset */ + linear_hoffset = hoffset_chunk[0]; + for (j = 1; j < (int) mapping->hdim; j++) + { + linear_hoffset += (hoffset_chunk[j] + point[j]) * points_per_hdim[j]; + } + /* copy the line */ + memcpy (copy_to + linear_hoffset * hdata_size, copy_from, + hsize_chunk[0] * hdata_size); + copy_from += hsize_chunk[0] * hdata_size; + + /* increment current looper */ + point[i]++; + + } /* end of nested loops over all dimensions */ + } /* end of loop over all processors */ + + /* free allocated temporary vectors */ + free (point); +} |