From e53db17ff31f4a0abebe0eb65aaa63c030155f01 Mon Sep 17 00:00:00 2001 From: tradke Date: Thu, 25 Apr 2002 16:31:34 +0000 Subject: Implemented routines to define and extract local hyperslabs. This finally closes a couple feature requests. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHSlab/trunk@82 10716dce-81a3-4424-a2c8-48026a0d3035 --- src/GetHyperslab.c | 174 ++++++++++---------------------- src/Mapping.c | 285 +++++++++++++++++++++++++++++++++++++---------------- src/PUGHSlab.h | 36 ++++--- src/PUGHSlabi.h | 22 +++-- 4 files changed, 288 insertions(+), 229 deletions(-) diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c index 71ff33d..6ad4f86 100644 --- a/src/GetHyperslab.c +++ b/src/GetHyperslab.c @@ -38,7 +38,6 @@ CCTK_FILEVERSION(CactusPUGH_PUGHSlab_GetHyperslab_c) ********************************************************************/ static int GetLocalHyperslab (const cGH *GH, const hslab_mapping_t *mapping, - int proc, int vindex, int timelevel, int hdatatype, @@ -49,12 +48,12 @@ static int GetDiagonalFromFrom3D (const cGH *GH, int timelevel, int hdatatype, void *hdata); +#ifdef CCTK_MPI static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, + int nprocs, int hdata_size, const void *chunked_hdata, - void *unchunked_hdata, - int nprocs, - const CCTK_INT *chunk_hsize, - int hdata_size); + void *unchunked_hdata); +#endif CCTK_INT Hyperslab_Get (const cGH *GH, @@ -85,43 +84,43 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, void *const *hdata /* num_arrays */, CCTK_INT *retvals /* num_arrays */) { - int i, nprocs, proc, timelevel, hdatatype, hdata_size, retval; + int i, timelevel, hdatatype, hdata_size, retval; CCTK_INT result, *result_ptr; hslab_mapping_t *mapping; void *local_hdata; #ifdef CCTK_MPI - int myproc; - CCTK_INT totals_global, *chunk_hsize, *local_hsize; + int myproc, nprocs, proc; MPI_Comm comm; - int *displs, *recvcnts; void *chunked_hdata; MPI_Datatype mpi_vtype; #endif retval = 0; - mapping = PUGHSlabi_GetMapping (mapping_handle); - /*** FIXME: how to deal with local mapping ***/ - if (mapping == NULL) + if (num_arrays <= 0) { - retval = -1; + return (0); } - else + + mapping = PUGHSlabi_GetMapping (mapping_handle); + if (mapping) { /* check mapping consistency */ /*** FIXME ***/ /* all vindices[] must match the given mapping */ -#if 0 - NOTE mapping->totals denotes the number of LOCAL data points !!! /* check if there's any data to extract */ - if (mapping->totals <= 0) + if (mapping->totals_global <= 0) { retval = num_arrays; } -#endif + } + else + { + retval = -1; } + /* immediately return in case of errors or if there's nothing to do */ if (retval) { /* set the retvals[] according to the return value */ @@ -136,86 +135,19 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, return (retval); } - nprocs = CCTK_nProcs (GH); - #ifdef CCTK_MPI myproc = CCTK_MyProc (GH); - - /* now build the CCTK_INT buffer to communicate the sizes and offsets */ - /*** FIXME: could also do this already when creating the mapping ***/ - /*** could also cache the chunk_size in the mapping structure to - save following MPI_Allgather() calls but this makes - Hyperslab_DefineGlobalMapping() a collective call ***/ - local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1) - * sizeof (CCTK_INT)); - chunk_hsize = local_hsize + 2*mapping->hdim + 1; - for (i = 0; i < (int) mapping->hdim; i++) - { - local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i]; - local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i]; - } - local_hsize[2*mapping->hdim + 0] = mapping->totals; - + nprocs = CCTK_nProcs (GH); comm = PUGH_pGH (GH)->PUGH_COMM_WORLD; - /* FIXME: do an Allgather here so that all procs know how many data points - we have. This is useful to decide whether we have to gather - something at all or not and thus can return immediately */ - CACTUS_MPI_ERROR (MPI_Allgather (local_hsize, 2*mapping->hdim + 1, - PUGH_MPI_INT, chunk_hsize, - 2*mapping->hdim + 1, PUGH_MPI_INT, comm)); - - /* sum up the total number of hyperslab data points */ - totals_global = 0; - for (i = 0; i < nprocs; i++) - { - totals_global += chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; - } - - /* check if there's any data to extract */ - if (totals_global <= 0) - { - if (retvals) - { - memset (retvals, 0, num_arrays * sizeof (CCTK_INT)); - } - free (local_hsize); - return (0); - } - - /* set the displacements/receive counts for the gather operation */ - displs = (int *) malloc (2 * nprocs * sizeof (int)); - recvcnts = displs + nprocs; - - /* for hdim == 1 we let MPI_Gatherv() do the sorting directly into the - hyperslab buffer using the given hyperslab offsets and sizes */ - if (mapping->hdim == 1) - { - for (i = 0; i < nprocs; i++) - { - recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim]; - displs[i] = mapping->is_diagonal_in_3D ? - i == 0 ? 0 : displs[i-1] + recvcnts[i-1] : - chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim - 1]; - } - } - else - { - displs[0] = 0; - recvcnts[0] = chunk_hsize[2*mapping->hdim]; - for (proc = 1; proc < nprocs; proc++) - { - displs[proc] = displs[proc-1] + chunk_hsize[(proc-1)*(2*mapping->hdim+1) + - 2*mapping->hdim]; - recvcnts[proc] = chunk_hsize[proc*(2*mapping->hdim+1) + 2*mapping->hdim]; - } - } +#else + /* suppress compiler warnings about unused parameters */ + (void) (procs + 0); #endif /* now loop over all hyperslabs */ /*** FIXME: should optimize this loop away ***/ for (i = 0; i < num_arrays; i++) { - proc = procs ? procs[i] : DEFAULT_PROCESSOR; timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL; hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]); hdata_size = CCTK_VarTypeSize (hdatatype); @@ -229,11 +161,12 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, else { /* allocate temporary array to keep the local hyperslab data */ - local_hdata = nprocs == 1 ? - hdata[i] : malloc (mapping->totals * hdata_size); + local_hdata = mapping->is_global_hyperslab ? + malloc (mapping->totals * hdata_size) : hdata[i]; + /* get the processor-local hyperslab */ - *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i], - timelevel, hdatatype, local_hdata); + *result_ptr = GetLocalHyperslab (GH, mapping, vindices[i], timelevel, + hdatatype, local_hdata); } if (*result_ptr == 0) @@ -241,18 +174,19 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, retval++; } - if (nprocs == 1) + if (! mapping->is_global_hyperslab) { continue; } #ifdef CCTK_MPI + proc = procs ? procs[i] : DEFAULT_PROCESSOR; if (proc < 0 || proc == myproc) { /* allocate temporary array to receive the chunks from all processors */ /* for the case of hdim == 1, the sorting is done by MPI */ chunked_hdata = mapping->hdim > 1 ? - malloc (totals_global * hdata_size) : hdata[i]; + malloc (mapping->totals_global * hdata_size) : hdata[i]; } else { @@ -265,15 +199,15 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, /* collect the hyperslab chunks from all processors */ if (proc < 0) { - CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, mapping->totals, - mpi_vtype, chunked_hdata, recvcnts, - displs, mpi_vtype, comm)); + CACTUS_MPI_ERROR (MPI_Allgatherv (local_hdata, mapping->totals, mpi_vtype, + chunked_hdata, mapping->recvcnts, + mapping->displs, mpi_vtype, comm)); } else { - CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, mapping->totals, - mpi_vtype, chunked_hdata, recvcnts, - displs, mpi_vtype, proc, comm)); + CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, mapping->totals, mpi_vtype, + chunked_hdata, mapping->recvcnts, + mapping->displs, mpi_vtype, proc, comm)); } /* free processor-local chunk */ @@ -286,17 +220,13 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, The user wants it unchunked, so let's sort it... */ if (mapping->hdim > 1 && (proc < 0 || proc == myproc)) { - SortIntoUnchunkedHyperslab (mapping, chunked_hdata, hdata[i], nprocs, chunk_hsize, hdata_size); + SortIntoUnchunkedHyperslab (mapping, nprocs, hdata_size, chunked_hdata, + hdata[i]); free (chunked_hdata); } #endif } -#ifdef CCTK_MPI - free (displs); - free (local_hsize); -#endif - return (retval); } @@ -402,7 +332,6 @@ CCTK_INT Hyperslab_GetList (const cGH *GH, @@*/ static int GetLocalHyperslab (const cGH *GH, const hslab_mapping_t *mapping, - int proc, int vindex, int timelevel, int hdatatype, @@ -437,10 +366,6 @@ static int GetLocalHyperslab (const cGH *GH, { errormsg = "NULL pointer(s) passed for GH/mapping/hdata arguments"; } - else if (proc >= CCTK_nProcs (GH)) - { - errormsg = "Invalid processor ID passed for proc argument"; - } else if (CCTK_GroupData (CCTK_GroupIndexFromVarI (vindex), &vinfo) < 0) { errormsg = "Invalid variable index given"; @@ -709,12 +634,11 @@ static int GetDiagonalFromFrom3D (const cGH *GH, } +#ifdef CCTK_MPI static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, + int nprocs, int hdata_size, const void *chunked_hdata, - void *unchunked_hdata, - int nprocs, - const CCTK_INT *chunk_hsize, - int hdata_size) + void *unchunked_hdata) { int i, j, proc, linear_hoffset; int *point, *points_per_hdim; @@ -728,7 +652,7 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, points_per_hdim = point + mapping->hdim; points_per_hdim[0] = 1; - for (i = 1; i < (int) mapping->hdim; i++) + for (i = 1; i < mapping->hdim; i++) { points_per_hdim[i] = points_per_hdim[i-1] * mapping->global_hsize[i-1]; } @@ -740,15 +664,16 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, /* now copy the chunks from each processor into the global hyperslab */ for (proc = 0; proc < nprocs; proc++) { + /* get the pointers to processor i's chunk offset and size */ + hoffset_chunk = mapping->chunk + proc * (2*mapping->hdim+1); + hsize_chunk = hoffset_chunk + mapping->hdim; + /* skip processors which didn't contribute any data */ - if (chunk_hsize[proc * (2*mapping->hdim + 1) + 2*mapping->hdim] <= 0) + if (hsize_chunk[mapping->hdim] <= 0) { continue; } - hsize_chunk = chunk_hsize + proc * (2*mapping->hdim+1); - hoffset_chunk = hsize_chunk + mapping->hdim; - /* set startpoint to zero */ memset (point, 0, mapping->hdim * sizeof (int)); @@ -759,7 +684,7 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, if (point[i] >= hsize_chunk[i]) { /* increment outermost loopers */ - for (i++; i < (int) mapping->hdim; i++) + for (i++; i < mapping->hdim; i++) { if (++point[i] < hsize_chunk[i]) { @@ -768,7 +693,7 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, } /* done if beyond outermost loop */ - if (i >= (int) mapping->hdim) + if (i >= mapping->hdim) { break; } @@ -783,7 +708,7 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, /* get the linear offset */ linear_hoffset = hoffset_chunk[0]; - for (j = 1; j < (int) mapping->hdim; j++) + for (j = 1; j < mapping->hdim; j++) { linear_hoffset += (hoffset_chunk[j] + point[j]) * points_per_hdim[j]; } @@ -801,3 +726,4 @@ static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping, /* free allocated temporary vectors */ free (point); } +#endif diff --git a/src/Mapping.c b/src/Mapping.c index b600ce7..f9e3803 100644 --- a/src/Mapping.c +++ b/src/Mapping.c @@ -49,33 +49,96 @@ CCTK_FILEVERSION(CactusPUGH_PUGHSlab_Mapping_c) /******************************************************************** - ******************** Internal Routines ************************ + ******************** Static Variables ************************ ********************************************************************/ static int nmapping_list = 0; static hslab_mapping_t *mapping_list = NULL; + +/******************************************************************** + ******************** Internal Routines ************************ + ********************************************************************/ +static CCTK_INT DefineMapping (const cGH *GH, + CCTK_INT vindex, + CCTK_INT hdim, + const CCTK_INT *direction, + const CCTK_INT *origin, + const CCTK_INT *extent, + const CCTK_INT *downsample, + CCTK_INT table_handle, + t_hslabConversionFn function, + int is_global_hyperslab, + CCTK_INT *hsize_local, + CCTK_INT *hsize_global, + CCTK_INT *hoffset_global); static int IsFullHyperslab (const pGA *GA, const CCTK_INT *origin, const CCTK_INT *extent, const hslab_mapping_t *mapping); -CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( - const cGH *GH, - CCTK_INT vindex, - CCTK_INT dim, - const CCTK_INT *direction /* vdim*dim */, - const CCTK_INT *origin /* vdim */, - const CCTK_INT *extent /* dim */, - const CCTK_INT *downsample /* dim */, - CCTK_INT table_handle, - t_hslabConversionFn conversion_fn, - CCTK_INT *hsize /* dim */) +CCTK_INT Hyperslab_DefineLocalMappingByIndex (const cGH *GH, + CCTK_INT vindex, + CCTK_INT hdim, + const CCTK_INT *direction, + const CCTK_INT *origin, + const CCTK_INT *extent, + const CCTK_INT *downsample, + CCTK_INT table_handle, + t_hslabConversionFn function, + CCTK_INT *hsize_local, + CCTK_INT *hsize_global, + CCTK_INT *hoffset_global) +{ + CCTK_INT retval; + + + retval = DefineMapping (GH, vindex, hdim, direction, origin, extent, + downsample, table_handle, function, 0, + hsize_local, hsize_global, hoffset_global); + return (retval); +} + + +CCTK_INT Hyperslab_DefineGlobalMappingByIndex (const cGH *GH, + CCTK_INT vindex, + CCTK_INT hdim, + const CCTK_INT *direction, + const CCTK_INT *origin, + const CCTK_INT *extent, + const CCTK_INT *downsample, + CCTK_INT table_handle, + t_hslabConversionFn function, + CCTK_INT *hsize) +{ + CCTK_INT retval; + + + retval = DefineMapping (GH, vindex, hdim, direction, origin, extent, + downsample, table_handle, function, 1, + NULL, hsize, NULL); + return (retval); +} + + +static CCTK_INT DefineMapping (const cGH *GH, + CCTK_INT vindex, + CCTK_INT hdim, + const CCTK_INT *direction, /* vdim*hdim */ + const CCTK_INT *origin, /* vdim */ + const CCTK_INT *extent, /* hdim */ + const CCTK_INT *downsample, /* hdim */ + CCTK_INT table_handle, + t_hslabConversionFn function, + int is_global_hyperslab, + CCTK_INT *hsize_local, /* hdim */ + CCTK_INT *hsize_global, /* hdim */ + CCTK_INT *hoffset_global /* hdim */) { - unsigned int vdim, hdim, num_dirs; + int vdim, dim, num_dirs; int retval; int stagger_index; - int myproc; + int myproc, nprocs; int i, j, k, npoints; hslab_mapping_t *mapping; const char *error_msg; @@ -85,6 +148,10 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( cGroup vinfo; + /* identify myself */ + myproc = CCTK_MyProc (GH); + nprocs = CCTK_nProcs (GH); + /* PUGHSlab doesn't use table information */ if (table_handle >= 0) { @@ -106,14 +173,14 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( "(not a CCTK_GF or CCTK_ARRAY type)"; retval = -2; } - else if (dim < 0 || dim > vinfo.dim) + else if (hdim < 0 || hdim > vinfo.dim) { error_msg = "invalid hyperslab dimension given"; retval = -2; } - else if (! direction || ! origin || ! extent || ! hsize) + else if (! direction || ! origin || ! extent) { - error_msg = "NULL pointer(s) passed for direction/origin/extent/hsize " + error_msg = "NULL pointer(s) passed for direction/origin/extent " "parameters"; retval = -3; } @@ -124,10 +191,10 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( } else { - for (vdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = 0; vdim < vinfo.dim; vdim++) { retval |= origin[vdim] < 0; - if (vdim < (unsigned int) dim) + if (vdim < hdim) { retval |= extent[vdim] <= 0; if (downsample) @@ -147,10 +214,20 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( mapping = (hslab_mapping_t *) malloc (sizeof (hslab_mapping_t)); if (mapping) { - mapping->vectors = (int *) malloc ((6*vinfo.dim + 3*dim) * sizeof (int)); + mapping->do_dir = (int *) malloc ((6*vinfo.dim + 2*nprocs) * sizeof(int)); + mapping->global_hsize = (CCTK_INT *) malloc (((2*hdim + 1) * (nprocs+1) + + hdim) * sizeof (CCTK_INT)); } - if (mapping == NULL || mapping->vectors == NULL) + if (! mapping || ! mapping->do_dir || ! mapping->global_hsize) { + if (mapping->do_dir) + { + free (mapping->do_dir); + } + if (mapping->global_hsize) + { + free (mapping->global_hsize); + } if (mapping) { free (mapping); @@ -168,48 +245,50 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( return (retval); } - mapping->hdim = (unsigned int) dim; + mapping->hdim = hdim; mapping->vinfo = vinfo; - mapping->conversion_fn = conversion_fn; + mapping->conversion_fn = function; /* assign memory for the other vectors */ - mapping->local_startpoint = mapping->vectors + 0*vinfo.dim; - mapping->local_endpoint = mapping->vectors + 1*vinfo.dim; - mapping->global_startpoint = mapping->vectors + 2*vinfo.dim; - mapping->global_endpoint = mapping->vectors + 3*vinfo.dim; - mapping->do_dir = mapping->vectors + 4*vinfo.dim; - mapping->downsample = mapping->vectors + 5*vinfo.dim; - mapping->local_hsize = mapping->vectors + 6*vinfo.dim + 0*dim; - mapping->global_hsize = mapping->vectors + 6*vinfo.dim + 1*dim; - mapping->global_hoffset = mapping->vectors + 6*vinfo.dim + 2*dim; + mapping->local_startpoint = mapping->do_dir + 1*vinfo.dim; + mapping->local_endpoint = mapping->do_dir + 2*vinfo.dim; + mapping->global_startpoint = mapping->do_dir + 3*vinfo.dim; + mapping->global_endpoint = mapping->do_dir + 4*vinfo.dim; + mapping->downsample = mapping->do_dir + 5*vinfo.dim; + mapping->displs = mapping->do_dir + 6*vinfo.dim; + mapping->recvcnts = mapping->do_dir + 6*vinfo.dim + nprocs; + + mapping->global_hoffset = mapping->global_hsize + 1*hdim; + mapping->local_hsize = mapping->global_hsize + 2*hdim; + mapping->chunk = mapping->global_hsize + 3*hdim + 1; /* check direction vectors */ - for (hdim = 0; hdim < mapping->hdim; hdim++) + for (dim = 0; dim < mapping->hdim; dim++) { num_dirs = 0; - for (vdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = 0; vdim < vinfo.dim; vdim++) { - if (direction[hdim*vinfo.dim + vdim]) + if (direction[dim*vinfo.dim + vdim]) { num_dirs++; } } if (num_dirs == 0) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Hyperslab_DefineGlobalMappingByIndex: %d-direction vector " - "is a null vector", hdim); + "is a null vector", dim); return (-7); } mapping->is_diagonal_in_3D = num_dirs == 3 && mapping->hdim == 1; if (num_dirs != 1 && ! mapping->is_diagonal_in_3D) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Hyperslab_DefineGlobalMappingByIndex: %d-direction vector " - "isn't axis-orthogonal", hdim); + "isn't axis-orthogonal", dim); return (-7); } } @@ -217,26 +296,26 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* diagonals can be extracted from non-staggered 3D variables only */ if (mapping->is_diagonal_in_3D && vinfo.stagtype != 0) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Hyperslab_DefineGlobalMappingByIndex: diagonals can be " "extracted from non-staggered 3D variables only"); return (-7); } - for (vdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = 0; vdim < vinfo.dim; vdim++) { mapping->do_dir[vdim] = 0; - for (hdim = 0; hdim < mapping->hdim; hdim++) + for (dim = 0; dim < mapping->hdim; dim++) { - if (direction[hdim*vinfo.dim + vdim]) + if (direction[dim*vinfo.dim + vdim]) { mapping->do_dir[vdim]++; } } if (mapping->do_dir[vdim] > 1) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_WARN (1, "Hyperslab_DefineGlobalMappingByIndex: duplicate direction " "vectors given"); return (-8); @@ -246,27 +325,26 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* get the pGH pointer and the variable's GA structure */ GA = (const pGA *) pughGH->variables[vindex][0]; extras = GA->extras; - myproc = CCTK_MyProc (GH); /* check extent */ - for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = dim = 0; vdim < vinfo.dim; vdim++) { - if (mapping->do_dir[vdim] && hdim < mapping->hdim) + if (mapping->do_dir[vdim] && dim < mapping->hdim) { - if (origin[vdim] + extent[hdim] > extras->nsize[vdim]) + if (origin[vdim] + extent[dim] > extras->nsize[vdim]) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Hyperslab_DefineGlobalMappingByIndex: extent in " - "%d-direction exceeds grid size", hdim); + "%d-direction exceeds grid size", dim); return (-8); } - hdim++; + dim++; } else if (mapping->is_diagonal_in_3D && origin[vdim] + extent[0] > extras->nsize[vdim]) { - free (mapping->vectors); free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Hyperslab_DefineGlobalMappingByIndex: extent in " "%d-direction exceeds grid size", vdim); @@ -275,27 +353,27 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( } /* now fill out the hyperslab mapping structure */ - for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = dim = 0; vdim < vinfo.dim; vdim++) { mapping->downsample[vdim] = 1; - if (mapping->do_dir[vdim] && hdim < mapping->hdim) + if (mapping->do_dir[vdim] && dim < mapping->hdim) { if (downsample) { - mapping->downsample[vdim] = downsample[hdim]; + mapping->downsample[vdim] = downsample[dim]; } - mapping->global_hsize[hdim] = extent[hdim] / mapping->downsample[vdim]; - if (extent[hdim] % mapping->downsample[vdim]) + mapping->global_hsize[dim] = extent[dim] / mapping->downsample[vdim]; + if (extent[dim] % mapping->downsample[vdim]) { - mapping->global_hsize[hdim]++; + mapping->global_hsize[dim]++; } /* subtract ghostzones for periodic BC */ if (GA->connectivity->perme[vdim]) { - mapping->global_hsize[hdim] -= 2 * extras->nghostzones[vdim]; + mapping->global_hsize[dim] -= 2 * extras->nghostzones[vdim]; } - hdim++; + dim++; } else if (mapping->is_diagonal_in_3D) { @@ -309,7 +387,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( { mapping->totals -= 2 * extras->nghostzones[vdim]; } - if ((unsigned int) mapping->global_hsize[0] > mapping->totals) + if (mapping->global_hsize[0] > mapping->totals) { mapping->global_hsize[0] = mapping->totals; } @@ -321,13 +399,13 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( if (mapping->is_full_hyperslab) { memset (mapping->local_startpoint, 0, vinfo.dim * sizeof (int)); - memcpy (mapping->local_endpoint, extras->lnsize, vinfo.dim*sizeof(int)); + memcpy (mapping->local_endpoint, extras->lnsize, vinfo.dim * sizeof (int)); mapping->totals = extras->npoints; } else if (mapping->is_diagonal_in_3D) { /* just initialize the downsample and global_startpoint vectors */ - for (vdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = 0; vdim < vinfo.dim; vdim++) { mapping->downsample[vdim] = mapping->downsample[0]; mapping->global_startpoint[vdim] = origin[vdim]; @@ -354,14 +432,14 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( else { /* compute the global endpoint */ - for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = dim = 0; vdim < vinfo.dim; vdim++) { mapping->global_endpoint[vdim] = origin[vdim] + - (mapping->do_dir[vdim] ? extent[hdim++] : 1); + (mapping->do_dir[vdim] ? extent[dim++] : 1); } /* compute this processor's global startpoint from the global ranges */ - for (vdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = 0; vdim < vinfo.dim; vdim++) { stagger_index = CCTK_StaggerDirIndex (vdim, vinfo.stagtype); @@ -388,7 +466,7 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( /* compute the local start- and endpoint from the global ranges */ mapping->totals = 1; - for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = dim = 0; vdim < vinfo.dim; vdim++) { stagger_index = CCTK_StaggerDirIndex (vdim, vinfo.stagtype); @@ -434,16 +512,16 @@ CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( if (mapping->do_dir[vdim]) { /* compute the local size in each hyperslab dimension */ - mapping->local_hsize[hdim] = (mapping->local_endpoint[vdim] - + mapping->local_hsize[dim] = (mapping->local_endpoint[vdim] - mapping->local_startpoint[vdim]) / mapping->downsample[vdim]; if ((mapping->local_endpoint[vdim] - mapping->local_startpoint[vdim]) % mapping->downsample[vdim]) { - mapping->local_hsize[hdim]++; + mapping->local_hsize[dim]++; } - mapping->totals *= mapping->local_hsize[hdim]; - hdim++; + mapping->totals *= mapping->local_hsize[dim]; + dim++; } } } /* end of else branch for 'if (mapping->is_full_hyperslab)' */ @@ -461,33 +539,63 @@ CCTK_Barrier (GH); gathering all the local sizes */ if (! mapping->is_diagonal_in_3D && mapping->totals > 0) { - for (vdim = hdim = 0; vdim < (unsigned int) vinfo.dim; vdim++) + for (vdim = dim = 0; vdim < vinfo.dim; vdim++) { if (mapping->do_dir[vdim]) { if (mapping->is_full_hyperslab) { - mapping->global_hoffset[hdim] = extras->lb[myproc][vdim]; + mapping->global_hoffset[dim] = extras->lb[myproc][vdim]; } else { - mapping->global_hoffset[hdim] = (mapping->global_startpoint[vdim] - + mapping->global_hoffset[dim] = (mapping->global_startpoint[vdim] - origin[vdim]) / mapping->downsample[vdim]; if (GA->connectivity->perme[vdim]) { - mapping->global_hoffset[hdim] -= extras->nghostzones[vdim]; + mapping->global_hoffset[dim] -= extras->nghostzones[vdim]; } } #ifdef DEBUG fprintf (stderr, "proc %d: global_hoffset, local_hsize in direction " - "%d: %d, %d\n", CCTK_MyProc (GH), hdim, - mapping->global_hoffset[hdim], mapping->local_hsize[hdim]); + "%d: %d, %d\n", CCTK_MyProc (GH), dim, + mapping->global_hoffset[dim], mapping->local_hsize[dim]); #endif - hdim++; + dim++; } } } + mapping->totals_global = mapping->totals; + mapping->is_global_hyperslab = is_global_hyperslab && nprocs > 1; + +#ifdef CCTK_MPI + /* for global hyperslabs: build the communication buffers to gather + the sizes and offsets of processor chunks */ + if (mapping->is_global_hyperslab) + { + /* append the total size of the chunk to the local_hsize[] array */ + mapping->local_hsize[mapping->hdim] = mapping->totals; + + /* gather sizes and offsets of processor chunks */ + CACTUS_MPI_ERROR (MPI_Allgather (mapping->global_hoffset, 2*mapping->hdim+1, + PUGH_MPI_INT, mapping->chunk, + 2*mapping->hdim+1, PUGH_MPI_INT, + pughGH->PUGH_COMM_WORLD)); + + /* set the displacements/receive counts for the data gather operation + in the following Hyperslab_Get() call; + also sum up the total number of data points of the global hyperslab */ + mapping->totals_global = 0; + for (i = 0; i < nprocs; i++) + { + mapping->displs[i] = i ? mapping->displs[i-1]+mapping->recvcnts[i-1] : 0; + mapping->recvcnts[i] = mapping->chunk[(i+1)*2*mapping->hdim + i]; + mapping->totals_global += mapping->recvcnts[i]; + } + } +#endif + /* add this mapping to the mapping list */ if (mapping_list) { @@ -499,12 +607,20 @@ CCTK_Barrier (GH); mapping->handle = nmapping_list++; - /* set the global hsize in the return arguments */ - if (hsize) + /* set the return vectors if given */ + for (dim = 0; dim < mapping->hdim; dim++) { - for (hdim = 0; hdim < mapping->hdim; hdim++) + if (hoffset_global) + { + hoffset_global[dim] = mapping->global_hoffset[dim]; + } + if (hsize_global) + { + hsize_global[dim] = mapping->global_hsize[dim]; + } + if (hsize_local) { - hsize[hdim] = mapping->global_hsize[hdim]; + hsize_local[dim] = mapping->local_hsize[dim]; } } @@ -537,8 +653,7 @@ CCTK_INT Hyperslab_FreeMapping (CCTK_INT mapping_handle) mapping->prev->next = mapping->next; } } - free (mapping->vectors); - free (mapping); + free (mapping->do_dir); free (mapping->global_hsize); free (mapping); return (0); } mapping = mapping->next; @@ -571,7 +686,7 @@ static int IsFullHyperslab (const pGA *GA, int i, retval; - retval = mapping->hdim == (unsigned int) GA->extras->dim; + retval = mapping->hdim == GA->extras->dim; if (retval) { for (i = 0; i < GA->extras->dim; i++) diff --git a/src/PUGHSlab.h b/src/PUGHSlab.h index 6020e47..edc582f 100644 --- a/src/PUGHSlab.h +++ b/src/PUGHSlab.h @@ -2,7 +2,7 @@ @header PUGHSlab.h @date Sun 28 May 2000 @author Thomas Radke - @desc + @desc Global function declarations of thorn PUGHSlab @enddesc @version $Header$ @@ -10,7 +10,7 @@ #ifndef _PUGHSLAB_PUGHSLAB_H_ -#define _PUGHSLAB_PUGHSLAB_H_ +#define _PUGHSLAB_PUGHSLAB_H_ 1 #ifdef __cplusplus extern "C" @@ -35,20 +35,34 @@ CCTK_INT Hyperslab_Get (const cGH *GH, CCTK_INT Hyperslab_GetList (const cGH *GH, CCTK_INT mapping_handle, CCTK_INT num_arrays, - const CCTK_INT *procs /* num_arrays */, - const CCTK_INT *vindices /* num_arrays */, - const CCTK_INT *timelevels /* num_arrays */, - const CCTK_INT *hdatatypes /* num_arrays */, - void *const *hdata /* num_arrays */, + const CCTK_INT *procs, /* num_arrays */ + const CCTK_INT *vindices, /* num_arrays */ + const CCTK_INT *timelevels, /* num_arrays */ + const CCTK_INT *hdatatypes, /* num_arrays */ + void *const *hdata, /* num_arrays */ CCTK_INT *retvals /* num_arrays */); +CCTK_INT Hyperslab_DefineLocalMappingByIndex ( + const cGH *GH, + CCTK_INT vindex, + CCTK_INT hdim, + const CCTK_INT *direction, /* vdim*hdim */ + const CCTK_INT *origin, /* vdim */ + const CCTK_INT *extent, /* hdim */ + const CCTK_INT *downsample,/* hdim */ + CCTK_INT table_handle, + t_hslabConversionFn conversion_fn, + CCTK_INT *hsize_local, /* hdim */ + CCTK_INT *hsize_global, /* hdim */ + CCTK_INT *hoffset_global /* hdim */); + CCTK_INT Hyperslab_DefineGlobalMappingByIndex ( const cGH *GH, CCTK_INT vindex, CCTK_INT hdim, - const CCTK_INT *direction /* vdim*hdim */, - const CCTK_INT *origin /* vdim */, - const CCTK_INT *extent /* hdim */, - const CCTK_INT *downsample /* hdim */, + const CCTK_INT *direction, /* vdim*hdim */ + const CCTK_INT *origin, /* vdim */ + const CCTK_INT *extent, /* hdim */ + const CCTK_INT *downsample,/* hdim */ CCTK_INT table_handle, t_hslabConversionFn conversion_fn, CCTK_INT *hsize /* hdim */); diff --git a/src/PUGHSlabi.h b/src/PUGHSlabi.h index 6b46a36..8160403 100644 --- a/src/PUGHSlabi.h +++ b/src/PUGHSlabi.h @@ -10,7 +10,7 @@ #ifndef _PUGHSLAB_PUGHSLABI_H_ -#define _PUGHSLAB_PUGHSLABI_H_ +#define _PUGHSLAB_PUGHSLABI_H_ 1 #include "cctk_Groups.h" #include "PUGHSlab.h" @@ -24,19 +24,23 @@ extern "C" typedef struct hslab_mapping_t { int handle; - unsigned int hdim; - unsigned int vdim; - int *vectors; + int hdim; + int vdim; + int *do_dir; /* vdim */ int *local_startpoint; /* vdim */ int *local_endpoint; /* vdim */ int *global_startpoint; /* vdim */ int *global_endpoint; /* vdim */ - int *do_dir; /* vdim */ int *downsample; /* vdim */ - int *global_hsize; /* hdim */ - int *global_hoffset; /* hdim */ - int *local_hsize; /* hdim */ - unsigned int totals; + int *displs; /* nprocs */ + int *recvcnts; /* nprocs */ + CCTK_INT *global_hsize; /* hdim */ + CCTK_INT *global_hoffset; /* hdim */ + CCTK_INT *local_hsize; /* hdim+1 */ + CCTK_INT *chunk; /* nprocs*(2*hdim+1) */ + int totals; + int totals_global; + int is_global_hyperslab; int is_full_hyperslab; int is_diagonal_in_3D; t_hslabConversionFn conversion_fn; -- cgit v1.2.3