Sort processor-local chunks into an unchuned hyperslab.

git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHSlab/trunk@81 10716dce-81a3-4424-a2c8-48026a0d3035
author: tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035> 2002-04-13 21:06:58 +0000
committer: tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035> 2002-04-13 21:06:58 +0000
commit: 0c70c0bf78a59733589be194ac4971400c581651 (patch)
tree: 2881acbe9ac9567d136b9b06ff782bc22b23c8ef
parent: d5d7f1405dc8676013568eef37b9ba4acd96c352 (diff)
1 files changed, 119 insertions, 19 deletions
diff --git a/src/GetHyperslab.c b/src/GetHyperslab.c
index 1ad9a9c..71ff33d 100644
--- a/src/GetHyperslab.c
+++ b/src/GetHyperslab.c
@@ -49,6 +49,12 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
                                   int timelevel,
                                   int hdatatype,
                                   void *hdata);
+static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping,
+                                        const void *chunked_hdata,
+                                        void *unchunked_hdata,
+                                        int nprocs,
+                                        const CCTK_INT *chunk_hsize,
+                                        int hdata_size);
 
 
 CCTK_INT Hyperslab_Get (const cGH *GH,
@@ -79,7 +85,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
                             void *const *hdata           /* num_arrays */,
                             CCTK_INT *retvals            /* num_arrays */)
 {
-  int i, nprocs, proc, timelevel, hdatatype, retval;
+  int i, nprocs, proc, timelevel, hdatatype, hdata_size, retval;
   CCTK_INT result, *result_ptr;
   hslab_mapping_t *mapping;
   void *local_hdata;
@@ -143,7 +149,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
   local_hsize = (CCTK_INT *) malloc ((1 + nprocs) * (2*mapping->hdim + 1)
                                                   * sizeof (CCTK_INT));
   chunk_hsize = local_hsize + 2*mapping->hdim + 1;
-  for (i = 0; i < mapping->hdim; i++)
+  for (i = 0; i < (int) mapping->hdim; i++)
   {
     local_hsize[0*mapping->hdim + i] = mapping->local_hsize[i];
     local_hsize[1*mapping->hdim + i] = mapping->global_hoffset[i];
@@ -196,11 +202,11 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
   {
     displs[0] = 0;
     recvcnts[0] = chunk_hsize[2*mapping->hdim];
-    for (i = 1; i < nprocs; i++)
+    for (proc = 1; proc < nprocs; proc++)
     {
-      displs[i]   = displs[i-1] + chunk_hsize[(i-1)*(2*mapping->hdim+1) +
-                                                     2*mapping->hdim];
-      recvcnts[i] = chunk_hsize[i*(2*mapping->hdim+1) + 2*mapping->hdim];
+      displs[proc] = displs[proc-1] + chunk_hsize[(proc-1)*(2*mapping->hdim+1) +
+                                                  2*mapping->hdim];
+      recvcnts[proc] = chunk_hsize[proc*(2*mapping->hdim+1) + 2*mapping->hdim];
     }
   }
 #endif
@@ -212,6 +218,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
     proc = procs ? procs[i] : DEFAULT_PROCESSOR;
     timelevel = timelevels ? timelevels[i] : DEFAULT_TIMELEVEL;
     hdatatype = hdatatypes ? hdatatypes[i] : CCTK_VarTypeI (vindices[i]);
+    hdata_size = CCTK_VarTypeSize (hdatatype);
     result_ptr = retvals ? &retvals[i] : &result;
 
     if (mapping->totals <= 0)
@@ -223,7 +230,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
     {
       /* allocate temporary array to keep the local hyperslab data */
       local_hdata = nprocs == 1 ?
-                    hdata[i] : malloc (mapping->totals * CCTK_VarTypeSize (hdatatype));
+                    hdata[i] : malloc (mapping->totals * hdata_size);
       /* get the processor-local hyperslab */
       *result_ptr = GetLocalHyperslab (GH, mapping, proc, vindices[i],
                                        timelevel, hdatatype, local_hdata);
@@ -245,8 +252,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
       /* allocate temporary array to receive the chunks from all processors */
       /* for the case of hdim == 1, the sorting is done by MPI */
       chunked_hdata = mapping->hdim > 1 ?
-                      malloc (totals_global * CCTK_VarTypeSize (hdatatype)) :
-                      hdata[i];
+                      malloc (totals_global * hdata_size) : hdata[i];
     }
     else
     {
@@ -265,9 +271,9 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
     }
     else
     {
-      CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata==0?&proc:local_hdata, mapping->totals, mpi_vtype,
-                                     chunked_hdata==0?&proc:chunked_hdata, recvcnts, displs,
-                                     mpi_vtype, proc, comm));
+      CACTUS_MPI_ERROR (MPI_Gatherv (local_hdata, mapping->totals,
+                                     mpi_vtype, chunked_hdata, recvcnts,
+                                     displs, mpi_vtype, proc, comm));
     }
 
     /* free processor-local chunk */
@@ -280,7 +286,7 @@ CCTK_INT Hyperslab_GetList (const cGH *GH,
        The user wants it unchunked, so let's sort it... */
     if (mapping->hdim > 1 && (proc < 0 || proc == myproc))
     {
-/*      SortIntoUnchunkedHyperslab (chunked_hdata, hdata[i]); */
+      SortIntoUnchunkedHyperslab (mapping, chunked_hdata, hdata[i], nprocs, chunk_hsize, hdata_size);
       free (chunked_hdata);
     }
 #endif
@@ -659,7 +665,7 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
                                   void *hdata)
 {
   int i, j, k, npoints, myproc, linear_idx;
-  int vdatatype, htypesize, vtypesize;
+  int vdatatype, hdata_size, vdata_size;
   const char *vdata;
   const pGExtras *extras;
 
@@ -668,8 +674,8 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
   extras = ((const pGA *) PUGH_pGH (GH)->variables[vindex][timelevel])->extras;
 
   vdatatype = CCTK_VarTypeI (vindex);
-  htypesize = CCTK_VarTypeSize (hdatatype);
-  vtypesize = CCTK_VarTypeSize (vdatatype);
+  hdata_size = CCTK_VarTypeSize (hdatatype);
+  vdata_size = CCTK_VarTypeSize (vdatatype);
   vdata = (const char *) CCTK_VarDataPtrI (GH, timelevel, vindex);
 
   myproc = CCTK_MyProc (GH);
@@ -686,13 +692,13 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
       linear_idx = i + j*extras->hyper_volume[1] + k*extras->hyper_volume[2];
       if (vdatatype != hdatatype)
       {
-        mapping->conversion_fn (vdata + linear_idx*vtypesize, hdata, 1, 1, 1);
+        mapping->conversion_fn (vdata + linear_idx*vdata_size, hdata, 1, 1, 1);
       }
       else
       {
-        memcpy (hdata, vdata + linear_idx*vtypesize, htypesize);
+        memcpy (hdata, vdata + linear_idx*vdata_size, hdata_size);
       }
-      hdata = (char *) hdata + htypesize;
+      hdata = (char *) hdata + hdata_size;
     }
     i += mapping->downsample[0];
     j += mapping->downsample[1];
@@ -701,3 +707,97 @@ static int GetDiagonalFromFrom3D (const cGH *GH,
 
   return (0);
 }
+
+
+static void SortIntoUnchunkedHyperslab (const hslab_mapping_t *mapping,
+                                        const void *chunked_hdata,
+                                        void *unchunked_hdata,
+                                        int nprocs,
+                                        const CCTK_INT *chunk_hsize,
+                                        int hdata_size)
+{
+  int i, j, proc, linear_hoffset;
+  int *point, *points_per_hdim;
+  char *copy_to;
+  const char *copy_from;
+  const CCTK_INT *hsize_chunk, *hoffset_chunk;
+
+
+  /* allocate temporary vectors */
+  point = (int *) malloc (2 * mapping->hdim * sizeof (int));
+  points_per_hdim = point + mapping->hdim;
+
+  points_per_hdim[0] = 1;
+  for (i = 1; i < (int) mapping->hdim; i++)
+  {
+    points_per_hdim[i] = points_per_hdim[i-1] * mapping->global_hsize[i-1];
+  }
+
+  /* use char pointers for easy incrementing when copying */
+  copy_from = (const char *) chunked_hdata;
+  copy_to   = (char *) unchunked_hdata;
+
+  /* now copy the chunks from each processor into the global hyperslab */
+  for (proc = 0; proc < nprocs; proc++)
+  {
+    /* skip processors which didn't contribute any data */
+    if (chunk_hsize[proc * (2*mapping->hdim + 1) + 2*mapping->hdim] <= 0)
+    {
+      continue;
+    }
+
+    hsize_chunk = chunk_hsize + proc * (2*mapping->hdim+1);
+    hoffset_chunk = hsize_chunk + mapping->hdim;
+
+    /* set startpoint to zero */
+    memset (point, 0, mapping->hdim * sizeof (int));
+
+    i = 1;
+    while (1)
+    {
+      /* check for end of current loop */
+      if (point[i] >= hsize_chunk[i])
+      {
+        /* increment outermost loopers */
+        for (i++; i < (int) mapping->hdim; i++)
+        {
+          if (++point[i] < hsize_chunk[i])
+          {
+            break;
+          }
+        }
+
+        /* done if beyond outermost loop */
+        if (i >= (int) mapping->hdim)
+        {
+          break;
+        }
+
+        /* reset innermost loopers */
+        for (i--; i > 0; i--)
+        {
+          point[i] = 0;
+        }
+        i = 1;
+      }
+
+      /* get the linear offset */
+      linear_hoffset = hoffset_chunk[0];
+      for (j = 1; j < (int) mapping->hdim; j++)
+      {
+        linear_hoffset += (hoffset_chunk[j] + point[j]) * points_per_hdim[j];
+      }
+      /* copy the line */
+      memcpy (copy_to + linear_hoffset * hdata_size, copy_from,
+              hsize_chunk[0] * hdata_size);
+      copy_from += hsize_chunk[0] * hdata_size;
+
+      /* increment current looper */
+      point[i]++;
+
+    } /* end of nested loops over all dimensions */
+  } /* end of loop over all processors */
+
+  /* free allocated temporary vectors */
+  free (point);
+}
author	tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035>	2002-04-13 21:06:58 +0000
committer	tradke <tradke@10716dce-81a3-4424-a2c8-48026a0d3035>	2002-04-13 21:06:58 +0000
commit	0c70c0bf78a59733589be194ac4971400c581651 (patch)
tree	2881acbe9ac9567d136b9b06ff782bc22b23c8ef
parent	d5d7f1405dc8676013568eef37b9ba4acd96c352 (diff)