aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authortradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001>1999-10-05 01:24:27 +0000
committertradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001>1999-10-05 01:24:27 +0000
commit3aa41187b549ff9a792d673e86efe5220848d73f (patch)
tree0bdc27f2b94a7dfd29cf4d0c25c2cd819fd833a2 /src
parent8a113f1371d777ca62b6c690e2f44bbebebd79c0 (diff)
Added Jonghyun's IOPanda thorn.
Tested on O2K, needs porting/testing on other architectures. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOPanda/trunk@2 38c3d835-c875-442e-b0fe-21c19ce1d001
Diffstat (limited to 'src')
-rw-r--r--src/DumpVar.c202
-rw-r--r--src/GHExtension.c90
-rw-r--r--src/Output3D.c487
-rw-r--r--src/Panda/App_Info.C96
-rw-r--r--src/Panda/App_Info.h31
-rw-r--r--src/Panda/Array.C649
-rw-r--r--src/Panda/Array.h88
-rw-r--r--src/Panda/ArrayDistribution.C205
-rw-r--r--src/Panda/ArrayDistribution.h70
-rw-r--r--src/Panda/ArrayGroup.C521
-rw-r--r--src/Panda/ArrayGroup.h75
-rw-r--r--src/Panda/ArrayLayout.C179
-rw-r--r--src/Panda/ArrayLayout.h26
-rw-r--r--src/Panda/Attribute.C187
-rw-r--r--src/Panda/Attribute.h43
-rw-r--r--src/Panda/CSDIO.C694
-rw-r--r--src/Panda/CSDIO.h60
-rw-r--r--src/Panda/CSDIO_Shared.C241
-rw-r--r--src/Panda/CSDIO_Shared.h33
-rw-r--r--src/Panda/Chunk.C692
-rw-r--r--src/Panda/Chunk.h68
-rw-r--r--src/Panda/Collective_IO.C25
-rw-r--r--src/Panda/Collective_IO.h18
-rw-r--r--src/Panda/List.C175
-rw-r--r--src/Panda/List.h61
-rw-r--r--src/Panda/MPIFS.C971
-rw-r--r--src/Panda/MPIFS.h95
-rw-r--r--src/Panda/Panda.C153
-rw-r--r--src/Panda/Panda.h31
-rw-r--r--src/Panda/Shared_IO.C237
-rw-r--r--src/Panda/Shared_IO.h32
-rw-r--r--src/Panda/Simple_IO.C846
-rw-r--r--src/Panda/Simple_IO.h91
-rw-r--r--src/Panda/StopWatch.h34
-rw-r--r--src/Panda/Template.C40
-rw-r--r--src/Panda/Template.h22
-rw-r--r--src/Panda/VirtFS.C4
-rw-r--r--src/Panda/VirtFS.h14
-rw-r--r--src/Panda/c_interface.C172
-rw-r--r--src/Panda/c_interface.h28
-rw-r--r--src/Panda/compute_test.C350
-rw-r--r--src/Panda/configure75
-rw-r--r--src/Panda/definitions.h186
-rw-r--r--src/Panda/fulltime.C410
-rw-r--r--src/Panda/io_main.C83
-rw-r--r--src/Panda/make.code.defn77
-rw-r--r--src/Panda/makefile.hpux.mpich19
-rw-r--r--src/Panda/makefile.ibm.mpif11
-rw-r--r--src/Panda/makefile.proto96
-rw-r--r--src/Panda/makefile.sgi.mpich10
-rw-r--r--src/Panda/makefile.sun.mpich18
-rw-r--r--src/Panda/message.h81
-rw-r--r--src/Panda/oneexe.C91
-rw-r--r--src/Panda/os-detected1
-rw-r--r--src/Panda/part_test.C385
-rw-r--r--src/Panda/shared_test.C353
-rw-r--r--src/Startup.c77
-rw-r--r--src/ioPandaGH.h32
-rw-r--r--src/make.code.defn3
-rw-r--r--src/make.configuration.defn21
60 files changed, 10165 insertions, 0 deletions
diff --git a/src/DumpVar.c b/src/DumpVar.c
new file mode 100644
index 0000000..86ed6d9
--- /dev/null
+++ b/src/DumpVar.c
@@ -0,0 +1,202 @@
+/*@@
+ @file DumpVar.c
+ @date 01 Oct 1999
+ @author Jonghyun Lee
+ @desc Do the actual writing of a 3D grid function,
+ for output or for checkpointing
+ @enddesc
+ @history
+ @hendhistory
+ @@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef SGI
+#include <time.h>
+#endif
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_GroupsOnGH.h"
+#include "cctk_Comm.h"
+#include "cctk_WarnLevel.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#ifdef CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#include "CactusBase/IOUtil/src/ioGH.h"
+#include "ioPandaGH.h"
+
+
+#define IOTAGBASE 20000 /* This may break on more than 2000 processors */
+
+
+static char *char_time_date = NULL;
+
+
+void IOPanda_getDumpData (cGH *GH, int index, int timelevel, void **outme,
+ int *free_outme, CCTK_INT4 bnd [9], int element_size)
+{
+ DECLARE_CCTK_PARAMETERS
+ int i;
+ int myproc;
+ ioGH *ioUtilGH;
+ pGH *pughGH;
+ CCTK_REAL4 *single_ptr;
+ CCTK_REAL *real_ptr;
+ CCTK_CHAR *char_ptr;
+ CCTK_INT *int_ptr;
+ void *data = CCTK_VarDataPtrI (GH, timelevel, index);
+
+ /* to make the compiler happy */
+ single_ptr = NULL;
+ real_ptr = NULL;
+ char_ptr = NULL;
+ int_ptr = NULL;
+
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+ pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")];
+
+ myproc = CCTK_MyProc (GH);
+
+ if (ioUtilGH->downsample_x == 1 &&
+ ioUtilGH->downsample_y == 1 &&
+ ioUtilGH->downsample_z == 1) {
+
+ if (ioUtilGH->out_single) {
+ single_ptr = (CCTK_REAL4 *) malloc (pughGH->npoints*sizeof (CCTK_REAL4));
+
+ for (i = 0; i < pughGH->npoints; i++)
+ single_ptr [i] = (CCTK_REAL4) ((CCTK_REAL *) data) [i];
+
+ *outme = single_ptr;
+ *free_outme = 1;
+ } else {
+ *outme = data;
+ *free_outme = 0;
+ }
+
+ for (i = 0; i < 3; i++) {
+ bnd [i] = GH->cctk_lbnd[i]; /* the bounds */
+ bnd [i+3] = GH->cctk_lsh[i]; /* the sizes */
+ bnd [i+6] = GH->cctk_gsh[i]; /* the global space */
+ }
+
+ } else {
+
+ int start [3], end [3];
+ int i, j, k, l;
+
+ /* Downsampling code ... */
+ bnd [6] = GH->cctk_gsh[0] / ioUtilGH->downsample_x;
+ if (GH->cctk_gsh[0] % ioUtilGH->downsample_x)
+ bnd [6]++;
+ bnd [7] = GH->cctk_gsh[1] / ioUtilGH->downsample_y;
+ if (GH->cctk_gsh[1] % ioUtilGH->downsample_y)
+ bnd [7]++;
+ bnd [8] = GH->cctk_gsh[2] / ioUtilGH->downsample_z;
+ if (GH->cctk_gsh[2] % ioUtilGH->downsample_z)
+ bnd [8]++;
+
+ if (verbose)
+ printf ("Downsampled sizes (%d, %d, %d) -> (%d, %d, %d)\n",
+ GH->cctk_gsh[0], GH->cctk_gsh[1], GH->cctk_gsh[2],
+ (int) bnd [6], (int) bnd [7], (int) bnd [8]);
+
+ /* Now figure out the local downsampling */
+ /* The local starts are the lb modded into the downsample */
+ for (i = 0; i < 3; i++) {
+ int downsample;
+
+ if (i == 0)
+ downsample = ioUtilGH->downsample_x;
+ else if (i == 1)
+ downsample = ioUtilGH->downsample_y;
+ else
+ downsample = ioUtilGH->downsample_z;
+
+ bnd [i] = GH->cctk_lbnd[i] / downsample;
+ start [i] = bnd [i] * downsample;
+ if (start [i] <
+ GH->cctk_lbnd[i] + pughGH->ownership [PUGH_VERTEXCTR][i][0]) {
+ start [i] += downsample;
+ bnd [i] ++;
+ }
+ end [i] = ((GH->cctk_lbnd [i] +
+ pughGH->ownership [PUGH_VERTEXCTR][i][1] - 1) / downsample)
+ * downsample;
+ bnd [i+3] = (end [i] - start [i]) / downsample + 1;
+ }
+
+ if (verbose) {
+ printf ("Downsample ranges (%d, %d, %d) -> (%d, %d, %d)\n",
+ start [0], start [1], start [2],
+ end [0], end [1], end [2]);
+ printf ("Local size/bound (%d, %d, %d) (%d, %d, %d)\n",
+ (int) bnd [3], (int) bnd [4], (int) bnd [5],
+ (int) bnd [0], (int) bnd [1], (int) bnd [2]);
+ }
+
+ /* compute local ranges */
+ for (i = 0; i < 3; i++) {
+ start [i] -= GH->cctk_lbnd [i];
+ end [i] -= GH->cctk_lbnd [i];
+ }
+
+ *outme = malloc (bnd [3] * bnd [4] * bnd [5] * element_size);
+ *free_outme = 1;
+
+ /* I hate it to repeat the loops for each case label
+ but that way produces much more efficient code */
+ l = 0;
+ switch (CCTK_VarTypeI (index)) {
+ case CCTK_VARIABLE_CHAR:
+ char_ptr = (CCTK_CHAR *) *outme;
+ for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+ for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+ for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+ char_ptr [l++] = ((CCTK_CHAR *) data) [DI (pughGH, i, j, k)];
+ break;
+
+ case CCTK_VARIABLE_INT:
+ int_ptr = (CCTK_INT *) *outme;
+ for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+ for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+ for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+ int_ptr [l++] = ((CCTK_INT *) data) [DI (pughGH, i, j, k)];
+ break;
+
+ case CCTK_VARIABLE_REAL:
+ if (ioUtilGH->out_single)
+ single_ptr = (CCTK_REAL4 *) *outme;
+ else
+ real_ptr = (CCTK_REAL *) *outme;
+ for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+ for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+ for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+ if (ioUtilGH->out_single)
+ single_ptr [l++] = (CCTK_REAL4)
+ (((CCTK_REAL *) data) [DI (pughGH, i, j, k)]);
+ else
+ real_ptr [l++] = ((CCTK_REAL *) data) [DI (pughGH, i, j, k)];
+ break;
+
+ default:
+ CCTK_WARN (1, "Unsupported variable type in IOPanda_getDumpData");
+ return;
+ }
+ }
+
+ if (verbose) {
+ printf ("Global size: %d %d %d\n",
+ (int) bnd [6], (int) bnd [7], (int) bnd [8]);
+ printf ("Lower bound: %d %d %d\n",
+ (int) bnd [0], (int) bnd [1], (int) bnd [2]);
+ printf ("Chunk size : %d %d %d\n",
+ (int) bnd [3], (int) bnd [4], (int) bnd [5]);
+ }
+}
+
+
diff --git a/src/GHExtension.c b/src/GHExtension.c
new file mode 100644
index 0000000..210db6a
--- /dev/null
+++ b/src/GHExtension.c
@@ -0,0 +1,90 @@
+ /*@@
+ @file GHExtension.c
+ @date 01 Oct 1999
+ @author Jonghyun Lee
+ @desc IOPanda GH extension stuff
+ @enddesc
+ @history
+ @endhistory
+ @@*/
+
+/*#define DEBUG_IO*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_Comm.h"
+#include "cctk_Misc.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#include "cctk_WarnLevel.h"
+#ifdef CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#include "CactusBase/IOUtil/src/ioGH.h"
+#include "ioPandaGH.h"
+
+void Panda_Create(int, int);
+
+void *IOPanda_SetupGH (tFleshConfig *config, int convergence_level, cGH *GH)
+{
+ int i, numvars;
+ pandaGH *newGH;
+
+ numvars = CCTK_NumVars ();
+
+ newGH = (pandaGH *) malloc (sizeof (pandaGH));
+ newGH->IO_3Dnum = (int *) malloc (numvars * sizeof (int));
+ newGH->IO_3Dlast = (int *) malloc (numvars * sizeof (int));
+
+ return (newGH);
+}
+
+int IOPanda_InitGH (cGH *GH)
+{
+ DECLARE_CCTK_PARAMETERS
+ int i;
+ ioGH *ioUtilGH;
+ pandaGH *myGH;
+
+ /* get the handles for IOUtil and IOPanda extensions */
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+ myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+ /* How often to output */
+ myGH->IO_3Devery = out_every;
+ if (out3D_every > 0)
+ myGH->IO_3Devery = out3D_every;
+
+ InitIONum (myGH->IO_3Dnum, out3D_vars);
+
+ /* Deal with the output directories */
+ myGH->outpfx_3D = outdir;
+ if (!CCTK_Equals(outdir3D,"outdir"))
+ myGH->outpfx_3D = outdir3D;
+
+ /* Create the output directories */
+ if (myGH->IO_3Devery > 0) {
+ if (CCTK_MyProc (GH) == 0) {
+ FILE *fp;
+
+ if (CCTK_mkdir (myGH->outpfx_3D) != 0)
+ CCTK_WARN (2,"Problem creating IO 3D directory");
+ fp = fopen("FILEPREFIX", "w");
+ fprintf(fp, "%s", myGH->outpfx_3D);
+ fclose(fp);
+ }
+ }
+
+ for (i=0; i<CCTK_NumVars(); i++)
+ myGH->IO_3Dlast [i] = -1;
+
+ myGH->fileList_3D = NULL;
+
+ Panda_Create(ioUtilGH->ioproc_every, 1);
+
+ return (0);
+}
diff --git a/src/Output3D.c b/src/Output3D.c
new file mode 100644
index 0000000..41143a9
--- /dev/null
+++ b/src/Output3D.c
@@ -0,0 +1,487 @@
+ /*@@
+ @file Output3D.c
+ @date 01 Oct 1999
+ @author Jonghyun Lee
+ @desc Functions to deal 3D output of GFs
+ @enddesc
+ @history
+ @hendhistory
+ @@*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_parameters.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_WarnLevel.h"
+#include "cctk_Comm.h"
+#include "ioPandaGH.h"
+#include "Panda/c_interface.h"
+#include "CactusBase/IOUtil/src/ioGH.h"
+#ifdef CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#ifdef SGI
+#include <time.h>
+#endif
+
+#include "external/IEEEIO/src/IOProtos.h"
+
+int IOPanda_Output3DVarAs (cGH *GH, const char *var, const char *alias);
+int IOPanda_TimeFor3D (cGH *GH, int index);
+void IOPanda_Timestep (cGH *GH, int index, const char *alias);
+void IOPanda_AddCommonAttributes (cGH *GH, int index, int timelevel, CCTK_INT *gsz, char *fname);
+void IOPanda_IEEEIOStructDump (cGH *GH, char *fname);
+
+/*@@
+ @routine IOPanda_Output3DGH
+ @date Sat March 6 1999
+ @author Gabrielle Allen
+ @desc
+ Loops over all variables and outputs them if necessary
+ @enddesc
+ @calls CCTK_GHExtensionHandle
+ CCTK_NumVars
+ CCTK_ImplementationFromVar
+ CCTK_VarName
+ IOPanda_TimeFor3D
+ IOPanda_Output3DVarAs
+ @calledby
+ @history
+
+ @endhistory
+ @var GH
+ @vdesc Pointer to CCTK GH
+ @vtype cGH
+ @vio in
+ @vcomment
+ @endvar
+@@*/
+
+int IOPanda_Output3DGH (cGH *GH)
+{
+ int i;
+ pandaGH *myGH;
+ char *implementation;
+ char *name;
+ char *fullname;
+ DECLARE_CCTK_PARAMETERS
+
+ /* Get the GH extension for IOPanda */
+ myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+ if (myGH->IO_3Devery <= 0)
+ return;
+
+ /* Loop over all variables */
+ for (i = 0; i < CCTK_NumVars (); i++) {
+ if (IOPanda_TimeFor3D (GH, i)) {
+ implementation = CCTK_ImpFromVarI (i);
+ name = CCTK_VarName (i);
+ fullname = (char *) malloc (strlen (implementation) +
+ strlen (name) + 3);
+ assert (fullname);
+ sprintf (fullname, "%s::%s", implementation, name);
+
+ if (verbose) {
+ printf ("IOPanda Output3DGH : \n");
+ printf (" fullname/name = %s/%s\n", fullname, name);
+ }
+
+ IOPanda_Output3DVarAs (GH, fullname, name);
+
+ free (fullname);
+
+ /* Register another 3D output for this GF */
+ myGH->IO_3Dnum [i]++;
+
+ /* Register GF as having 3D output this iteration */
+ myGH->IO_3Dlast [i] = GH->cctk_iteration;
+ }
+ }
+
+ return (0);
+}
+
+
+/*@@
+ @routine IOPanda_Output3DVarAs
+ @date Sat March 6 1999
+ @author Gabrielle Allen
+ @desc
+ unconditional output of a variable using the IOPanda 3D output method
+ @enddesc
+ @calls CCTK_DecomposeName
+ CCTK_VarIndex
+ CCTK_GHExtensionHandle
+ IOPanda_Write3D
+ @calledby IOPanda_Output3DGH
+ @history
+
+ @endhistory
+ @var GH
+ @vdesc Pointer to CCTK GH
+ @vtype cGH
+ @vio in
+ @vcomment
+ @endvar
+ @var fullname
+ @vdesc complete name of variable to output
+ @vtype const char *
+ @vio in
+ @vcomment
+ @endvar
+ @var alias
+ @vdesc alias name of variable to output (used to generate output filename)
+ @vtype const char *
+ @vio in
+ @vcomment
+ @endvar
+@@*/
+
+int IOPanda_Output3DVarAs (cGH *GH, const char *fullname, const char *alias)
+{
+ DECLARE_CCTK_PARAMETERS
+ int index;
+ pandaGH *myGH;
+
+ index = CCTK_VarIndex(fullname);
+
+ /* Get the GH extension for IOPanda */
+ myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+ if (verbose) {
+ printf ("\nIn IOPanda Output3DVarAs\n-------------------\n");
+ printf (" Fullname = -%s-\n", fullname);
+ printf (" Alias = -%s-\n", alias);
+ printf (" Index = %d\n", index);
+ }
+
+ /* Do the 3D output */
+ IOPanda_Timestep (GH, index, alias);
+
+ return (0);
+}
+
+
+/*@@
+ @routine IOPanda_TimeFor3D
+ @date Sat March 6 1999
+ @author Gabrielle Allen
+ @desc
+ Decides if it is time to output a variable using the IOPanda 3D output
+ method
+ @enddesc
+ @calls CCTK_GHExtensionHandle
+ CCTK_GroupTypeFromVarI
+ CCTK_WARN
+ CCTK_QueryGroupStorageI
+ CCTK_GroupNameFromVarI
+ @calledby IOPanda_Output3DGH
+ @history
+
+ @endhistory
+ @var GH
+ @vdesc Pointer to CCTK GH
+ @vtype cGH
+ @vio in
+ @vcomment
+ @endvar
+ @var index
+ @vdesc index of variable
+ @vtype int
+ @vio in
+ @vcomment
+ @endvar
+@@*/
+
+int IOPanda_TimeFor3D (cGH *GH, int index)
+{
+ pandaGH *myGH;
+
+ /* Get the GH extension for IOPanda */
+ myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+ /* Check this GF should be output */
+ if (! (myGH->IO_3Dnum [index] != 0 &&
+ GH->cctk_iteration % myGH->IO_3Devery == 0))
+ return (0);
+
+ /* Check GF not already output this iteration */
+ if (myGH->IO_3Dlast [index] == GH->cctk_iteration) {
+ CCTK_WARN (2, "Already done 3D output in IOPanda");
+ return (0);
+ }
+
+ /* Check GF has storage */
+ if (! CCTK_QueryGroupStorageI (GH,
+ CCTK_GroupIndexFromVarI(index))) {
+ char *fullname = CCTK_FullName (index);
+ char *msg = (char *) malloc (80 + strlen (fullname));
+
+ sprintf (msg, "No IOPandaIO 3D output for '%s' (no storage)", fullname);
+ CCTK_WARN (2, msg);
+ free (fullname);
+ free (msg);
+ return (0);
+ }
+
+ return (1);
+}
+
+
+/*@@
+ @routine IOPanda_TriggerOutput3D
+ @date Sat March 6 1999
+ @author Gabrielle Allen
+ @desc
+ Triggers the output a variable using the IOPanda 3D output
+ method
+ @enddesc
+ @calls CCTK_GHExtensionHandle
+ CCTK_VarName
+ IOPanda_Write3D
+ @calledby
+ @history
+
+ @endhistory
+ @var GH
+ @vdesc Pointer to CCTK GH
+ @vtype cGH
+ @vio in
+ @vcomment
+ @endvar
+ @var index
+ @vdesc index of variable to output
+ @vtype int
+ @vio in
+ @vcomment
+ @endvar
+@@*/
+
+int IOPanda_TriggerOutput3D (cGH *GH, int index)
+{
+ DECLARE_CCTK_PARAMETERS
+ pandaGH *myGH;
+ char *varname;
+
+ varname = CCTK_VarName (index);
+
+ /* Get the GH extension for IOPanda */
+ myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+ if (verbose) {
+ printf("\nIn IOPanda TriggerOutput3D\n---------------------\n");
+ printf(" Index = %d\n", index);
+ printf(" Variable = -%s-\n", varname);
+ }
+
+ /* Do the 3D output */
+ IOPanda_Timestep (GH, index, varname);
+
+ /* Register another 3D output for this GF */
+ myGH->IO_3Dnum [index]++;
+
+ /* Register GF as having 3D output this iteration */
+ myGH->IO_3Dlast [index] = GH->cctk_iteration;
+
+ return (0);
+}
+
+void IOPanda_Timestep(cGH *GH, int index, const char *alias)
+{
+ DECLARE_CCTK_PARAMETERS
+ void *data;
+ int tmp[1], tmp1[3], tmp2[3];
+ Distribution dist1[3], dist2[3];
+ CCTK_INT4 bnd[9];
+ int free_flag, timelevel;
+ ArrayInfo ainfo;
+
+ ioGH *ioUtilGH;
+ pGH *pughGH;
+
+ if (CCTK_GroupTypeFromVarI (index) == GROUP_SCALAR) {
+ printf("##### %s is scalar\n", alias);
+ return;
+ }
+
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+ pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")];
+
+ ainfo.name_ = (char *)alias;
+
+ ainfo.rank_ = 3;
+ tmp1[0] = GH->cctk_gsh[2];
+ tmp1[1] = GH->cctk_gsh[1];
+ tmp1[2] = GH->cctk_gsh[0];
+ ainfo.size_ = tmp1;
+
+ switch (CCTK_VarTypeI (index)) {
+ case CCTK_VARIABLE_CHAR:
+ ainfo.esize_ = CHAR;
+ break;
+ case CCTK_VARIABLE_INT:
+#ifdef CCTK_INTEGER_PRECISION_8
+ ainfo.esize_ = INT64;
+#elif CCTK_INTEGER_PRECISION_4
+ ainfo.esize_ = INT32;
+#elif CCTK_INTEGER_PRECISION_2
+ ainfo.esize_ = INT16;
+#endif
+ break;
+ case CCTK_VARIABLE_REAL:
+ if (ioUtilGH->out_single) ainfo.esize_ = FLOAT32;
+ else {
+#ifdef CCTK_REAL_PRECISION_8
+ ainfo.esize_ = FLOAT64;
+#elif CCTK_REAL_PRECISION_4
+ ainfo.esize_ = FLOAT32;
+#endif
+ }
+ }
+
+ ainfo.mem_rank_ = 3;
+ tmp2[0] = pughGH->nprocz; tmp2[1] = pughGH->nprocy; tmp2[2] = pughGH->nprocx;
+ ainfo.mem_layout_ = tmp2;
+ dist1[0] = dist1[1] = dist1[2] = BLOCK;
+ ainfo.mem_dist_ = dist1;
+
+ ainfo.disk_rank_ = 1;
+ dist2[0] = BLOCK; dist2[1] = dist2[2] = NONE;
+ tmp[0]= ((CCTK_nProcs(GH) - 1) / ioUtilGH->ioproc_every + 1);
+
+ ainfo.disk_layout_ = tmp;
+ ainfo.disk_dist_ = dist2;
+
+ timelevel = CCTK_NumTimeLevelsFromVarI (index) - 1;
+ if (timelevel > 0) timelevel--;
+
+ IOPanda_getDumpData(GH, index, timelevel, &data, &free_flag, bnd,
+ ainfo.esize_);
+ ainfo.data_ = (char *)data;
+ ainfo.stencil_width_ = pughGH->nghostzones;
+
+
+ PandaTimestep(&ainfo);
+ IOPanda_AddCommonAttributes(GH, index, timelevel, ainfo.size_, ainfo.name_);
+ if (PandaIsNewFile(ainfo.name_)) IOPanda_IEEEIOStructDump(GH, ainfo.name_);
+}
+
+void IOPanda_AddCommonAttributes (cGH *GH, int index, int timelevel,
+ CCTK_INT4 gsz [3], char *fname)
+{
+ DECLARE_CCTK_PARAMETERS
+ CCTK_REAL d3_to_IO [6]; /* buffer for writing doubles to IEEEIO */
+ CCTK_INT4 i_to_IO; /* buffer for writing an int to IEEEIO */
+ char *name, *gname;
+ ioGH *ioUtilGH;
+ char *char_time_date = "";
+
+#ifdef SGI
+ time_t t = time(NULL);
+ char_time_date = asctime (localtime (&t));
+#endif
+
+ /* Get the handle for IO extensions */
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+
+ name = CCTK_FullName (index);
+
+ Panda_WriteAttribute (fname, "name", BYTE, strlen (name) + 1, name);
+
+ free (name);
+
+ gname = CCTK_GroupNameFromVarI (index);
+ Panda_WriteAttribute (fname, "groupname", BYTE, strlen (gname) + 1, gname);
+ free (gname);
+
+ i_to_IO = CCTK_GroupTypeFromVarI (index);
+ Panda_WriteAttribute (fname, "grouptype", INT32,
+ 1, &i_to_IO);
+
+ i_to_IO = CCTK_NumTimeLevelsFromVarI (index);
+ Panda_WriteAttribute (fname, "ntimelevels", INT32,
+ 1, &i_to_IO);
+
+ i_to_IO = timelevel;
+ Panda_WriteAttribute (fname, "timelevel", INT32,
+ 1, &i_to_IO);
+
+ if (char_time_date && out3D_datestamp)
+ Panda_WriteAttribute (fname, "date", BYTE,
+ strlen (char_time_date) + 1, char_time_date);
+
+ Panda_WriteAttribute (fname, "time", FLOAT64, 1,&GH->cctk_time);
+
+ d3_to_IO [0] = CCTK_CoordOrigin ("x");
+ d3_to_IO [1] = CCTK_CoordOrigin ("y");
+ d3_to_IO [2] = CCTK_CoordOrigin ("z");
+ Panda_WriteAttribute (fname, "origin", FLOAT64,3,d3_to_IO);
+ CCTK_CoordRange (GH, &d3_to_IO [0], &d3_to_IO [3], "x");
+ CCTK_CoordRange (GH, &d3_to_IO [1], &d3_to_IO [4], "y");
+ CCTK_CoordRange (GH, &d3_to_IO [2], &d3_to_IO [5], "z");
+ Panda_WriteAttribute (fname, "min_ext",FLOAT64,3,d3_to_IO);
+ Panda_WriteAttribute (fname, "max_ext",FLOAT64, 3,d3_to_IO+3);
+
+ d3_to_IO [0] = GH->cctk_delta_space [0] * ioUtilGH->downsample_x;
+ d3_to_IO [1] = GH->cctk_delta_space [1] * ioUtilGH->downsample_y;
+ d3_to_IO [2] = GH->cctk_delta_space [2] * ioUtilGH->downsample_z;
+ Panda_WriteAttribute (fname, "delta", FLOAT64, 3,d3_to_IO);
+
+ if (ioUtilGH->downsample_x > 1 ||
+ ioUtilGH->downsample_y > 1 ||
+ ioUtilGH->downsample_z > 1) {
+ d3_to_IO [0] = GH->cctk_delta_space [0];
+ d3_to_IO [1] = GH->cctk_delta_space [1];
+ d3_to_IO [2] = GH->cctk_delta_space [2];
+ Panda_WriteAttribute (fname, "evolution_delta", FLOAT64, 3, d3_to_IO);
+ }
+
+ Panda_WriteAttribute (fname, "global_size", INT32, 3, gsz);
+
+ i_to_IO = CCTK_nProcs (GH);
+ Panda_WriteAttribute (fname, "nprocs", INT32, 1, &i_to_IO);
+
+ i_to_IO = ioUtilGH->ioproc_every;
+ Panda_WriteAttribute (fname, "ioproc_every", INT32, 1, &i_to_IO);
+
+ i_to_IO = ioUtilGH->unchunked;
+ Panda_WriteAttribute (fname, "unchunked", INT32, 1, &i_to_IO);
+
+ i_to_IO = GH->cctk_iteration;
+ Panda_WriteAttribute (fname, "iteration", INT32, 1, &i_to_IO);
+}
+
+
+void IOPanda_IEEEIOStructDump (cGH *GH, char *fname)
+{
+
+ CCTK_INT4 i_temp;
+ CCTK_REAL d_temp;
+ ioGH *ioUtilGH;
+
+
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+
+ i_temp = GH->cctk_iteration;
+ Panda_WriteAttribute (fname, "GH$iteration", INT32,
+ 1, &i_temp);
+
+ i_temp = ioUtilGH->ioproc_every;
+ Panda_WriteAttribute (fname, "GH$ioproc_every", INT32,
+ 1, &i_temp);
+
+ i_temp = CCTK_nProcs (GH);
+ Panda_WriteAttribute (fname, "GH$nprocs", INT32,
+ 1, &i_temp);
+
+ d_temp = GH->cctk_time;
+ Panda_WriteAttribute (fname, "GH$time", FLOAT64,
+ 1, &d_temp);
+}
diff --git a/src/Panda/App_Info.C b/src/Panda/App_Info.C
new file mode 100644
index 0000000..77f1d4b
--- /dev/null
+++ b/src/Panda/App_Info.C
@@ -0,0 +1,96 @@
+#include "definitions.h"
+#include "App_Info.h"
+
+App_Info::App_Info(int app_num, int app_size, int *world_ranks)
+{
+ int world_size;
+
+ app_num_ = app_num;
+ app_size_ = app_size;
+ world_ranks_ = copy_int_list(app_size, world_ranks);
+ MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+ relative_ranks_ = (int *) malloc(sizeof(int)*world_size);
+ for(int i=0; i < world_size; i++)
+ relative_ranks_[i] = -1;
+ for(i=0; i < app_size_; i++)
+ relative_ranks_[world_ranks_[i]] = i;
+ intra_comm_ = NULL;
+ combine_count_ = 0;
+
+#ifdef DEBUG
+ printf("Creating an new App Info object\n");
+ printf("App_num = %d App_size = %d\n", app_num_, app_size_);
+ printf("Ranks relative: world: world-relative\n");
+ for(int j=0;j<app_size_;j++)
+ printf(" %d %d %d\n", j, world_ranks_[j],
+ relative_ranks_[world_ranks_[j]]);
+#endif
+}
+
+
+App_Info::~App_Info()
+{
+ if (world_ranks_ != NULL) free(world_ranks_);
+ if (relative_ranks_ != NULL) free(relative_ranks_);
+ if (intra_comm_ != NULL)
+ {
+ MPI_Comm_free(intra_comm_);
+ free(intra_comm_);
+ intra_comm_ =NULL;
+ }
+ world_ranks_ = NULL;
+ relative_ranks_ = NULL;
+}
+
+
+int App_Info::app_num(){ return app_num_;}
+
+int App_Info::app_size(){ return app_size_;}
+
+int App_Info::get_master(){ return world_ranks_[0];}
+
+int App_Info::world_rank(int relative_rank)
+{
+ return world_ranks_[relative_rank];
+}
+
+int App_Info::relative_rank(int world_rank)
+{
+ return relative_ranks_[world_rank];
+}
+
+void App_Info::set_intra_comm(MPI_Comm *intra_comm)
+{
+ intra_comm_ = intra_comm;
+}
+
+MPI_Comm* App_Info::intra_comm()
+{
+ return intra_comm_;
+}
+
+void App_Info::inc_combine_count()
+{
+ combine_count_++;
+}
+
+int App_Info::combine_count()
+{
+ return combine_count_;
+}
+
+void App_Info::reset_combine_count()
+{
+ combine_count_ = 0;
+}
+
+
+int* App_Info::world_ranks(){
+ return world_ranks_;
+}
+
+void App_Info::world_ranks(int *ret_list)
+{
+ for(int i=0; i < app_size_; i++)
+ ret_list[i] = world_ranks_[i];
+}
diff --git a/src/Panda/App_Info.h b/src/Panda/App_Info.h
new file mode 100644
index 0000000..f5d9664
--- /dev/null
+++ b/src/Panda/App_Info.h
@@ -0,0 +1,31 @@
+#ifndef App_Info_dot_h
+#define App_Info_dot_h
+
+#include "mpi.h"
+
+class App_Info {
+ int app_num_;
+ int app_size_;
+ int *world_ranks_;
+ int *relative_ranks_;
+ MPI_Comm *intra_comm_;
+ int combine_count_;
+
+ public:
+ App_Info(int,int,int*);
+ virtual ~App_Info();
+ int app_num();
+ int app_size();
+ int get_master();
+ int world_rank(int);
+ int relative_rank(int);
+ void set_intra_comm(MPI_Comm *);
+ MPI_Comm* intra_comm();
+ void inc_combine_count();
+ int combine_count();
+ void reset_combine_count();
+ int *world_ranks();
+ void world_ranks(int*);
+};
+
+#endif
diff --git a/src/Panda/Array.C b/src/Panda/Array.C
new file mode 100644
index 0000000..e2fd7eb
--- /dev/null
+++ b/src/Panda/Array.C
@@ -0,0 +1,649 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Array.h"
+
+#include "external/IEEEIO/src/Arch.h"
+
+extern "C" {
+ int IOsizeOf(int);
+ int IOreadAttributeInfo(IOFile, char *,int *, int *);
+ int IOreadAttribute(IOFile,int,void*);
+}
+
+extern int global_system_type_;
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+/***************************************************************************
+ * Class: Array
+ * Description: This is a user-visible class. This is used to describe the
+ * global array. It also stores pointers to local chunks of
+ * data.
+ *
+ * Instance-variables:
+ * name_ - name of the array
+ * rank_ - rank of the array (inherited variable)
+ * size_ - size of the array (elements) in the various dimensions
+ * element_size_ - size of each array element (in bytes)
+ * compute_node_layout_ - layout of the compute nodes
+ * io_node_layout_ - layout of the io nodes
+ * subchunk_layout_ - layout of the subchunks
+ * natural_chunked - whether the array is natural chunked
+ * compute_node_alloc_policy - chunk dist policy on compute nodes
+ * io_node_alloc_policy - chunk dist policy on the io nodes
+ ****************************************************************************
+ */
+
+Array::Array() : Template()
+{
+ subchunk_layout_ = NULL;
+ element_size_ = 0;
+ natural_chunked_ = NO;
+ sub_chunked_ = NO;
+ overlap_ = NO;
+ io_strategy_ = SIMPLE_IO;
+}
+
+/* This function is used on the compute nodes to create the array *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per *
+ * compute node and round-robin on the io nodes. */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist):Template(rank, sizearray)
+{
+ do_init(name, rank, sizearray, elt_size, mem_layout, mem_dist,
+ io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+ /* call function to allocate chunk_list */
+ if (sizearray) allocate_chunks(COMPUTE_NODE);
+ overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per *
+ * compute node and round-robin on the io nodes. Also in this case*
+ * the user specifies the data ptr to be used. */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist,
+ char *data_ptr) : Template(rank, sizearray)
+{
+ char *ptr = data_ptr;
+ do_init(name, rank, sizearray, elt_size, mem_layout, mem_dist,
+ io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+ /* call function to allocate chunk_list */
+ if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, 0);
+ overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per *
+ * compute node and round-robin on the io nodes. Also in this case*
+ * the user specifies the data ptr to be used and stencil width. */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist,
+ char *data_ptr, int stencil_width) : Template(rank, sizearray)
+{
+ char *ptr = data_ptr;
+ do_init(name, rank, sizearray, elt_size, mem_layout, mem_dist,
+ io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+ /* call function to allocate chunk_list */
+ if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, stencil_width);
+ if (stencil_width > 0) overlap_ = YES;
+ else overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array *
+ * object. In this case there is user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per *
+ * compute node and round-robin on the io nodes. */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist,
+ ArrayLayout *sub_layout, Distribution* sub_dist)
+ : Template(rank, sizearray)
+{
+ do_init(name, rank, sizearray, elt_size, mem_layout, mem_dist,
+ io_layout, io_dist, sub_layout, sub_dist,
+ REGULAR, ROUND_ROBIN, HPF);
+ /* call function to allocate chunk_list */
+ if (sizearray) allocate_chunks(COMPUTE_NODE);
+ overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array *
+ * object. In this case there is user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per *
+ * compute node and round-robin on the io nodes. This function is *
+ * used to when the user provides the data_ptr. */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist,
+ ArrayLayout *sub_layout, Distribution* sub_dist,
+ char *data_ptr) : Template(rank, sizearray)
+{
+ char *ptr = data_ptr;
+ do_init(name, rank, sizearray, elt_size, mem_layout, mem_dist,
+ io_layout, io_dist, sub_layout, sub_dist,
+ REGULAR, ROUND_ROBIN, HPF);
+ /* call function to allocate chunk_list */
+ if (sizearray) allocate_chunks(COMPUTE_NODE, 1, &ptr, 0);
+ overlap_ = NO;
+}
+
+/* Initializes the state of the array object. the chunks are allocated *
+ * via another function */
+void Array::do_init(char *name, int rank, int *sizearray, int elt_size,
+ ArrayLayout *mem_layout, Distribution *mem_dist,
+ ArrayLayout *io_layout, Distribution *io_dist,
+ ArrayLayout *subchunk_layout, Distribution *subchunk_dist,
+ ChunkAllocPolicy comp_node_policy, ChunkAllocPolicy io_node_policy,
+ Block_Distribution block_dist)
+{
+ io_strategy_ = SIMPLE_IO;
+
+ name_ = (char *) malloc(sizeof(char)*(strlen(name)+5));
+ strcpy(name_, name);
+ ieee_size_ = elt_size;
+ element_size_ = IOsizeOf(ieee_size_);
+
+ compute_node_layout_ = new RegularDistribution(rank, mem_layout, mem_dist,
+ comp_node_policy, block_dist);
+ if (io_layout)
+ io_node_layout_ = new RegularDistribution(rank, io_layout, io_dist,
+ io_node_policy, block_dist);
+ else io_node_layout_ = NULL;
+ if (subchunk_layout)
+ subchunk_layout_ = new RegularDistribution(rank, subchunk_layout,
+ subchunk_dist, ROUND_ROBIN,
+ block_dist);
+ else subchunk_layout_ = NULL;
+
+ /* Check if there is any sub-chunking */
+ if (subchunk_layout_) sub_chunked_ = YES;
+ else sub_chunked_ = NO;
+
+ /* Check if there is any natural chuunking */
+ if (compute_node_layout_->equal(io_node_layout_)) natural_chunked_ = YES;
+ else natural_chunked_ = NO;
+}
+
+/* This function is used to initialize the array objects on the io *
+ * node side. */
+Array::Array(int **schema_buf)
+{
+ int* ptr = *schema_buf;
+
+ io_strategy_ = *ptr++;
+ op_type_ = *ptr++;
+ int len = *ptr++;
+ name_ = (char *) malloc(len+1);
+ for (int i=0; i< len; i++) name_[i] = (char) *ptr++;
+ name_[len] = '\0';
+ rank_ = *ptr++;
+
+ if (*ptr++ > 0) {
+ size_ = (int *) malloc(sizeof(int) * rank_);
+ for(int i=0; i < rank_; i++) size_[i] = *ptr++;
+ } else size_ = NULL;
+
+ element_size_ = *ptr++;
+ ieee_size_ = *ptr++;
+ natural_chunked_ = (Boolean) *ptr++;
+ sub_chunked_ = (Boolean) *ptr++;
+ overlap_ = (Boolean) *ptr++;
+
+ compute_node_layout_ = unpack_layout(&ptr);
+ io_node_layout_ = unpack_layout(&ptr);
+
+ if (sub_chunked_) subchunk_layout_ = unpack_layout(&ptr);
+ else subchunk_layout_ = NULL;
+
+ *schema_buf = ptr;
+}
+
+ArrayDistribution *Array::unpack_layout(int **schema_buf)
+{
+ int *ptr = *schema_buf;
+ int type = *ptr++;
+ ArrayDistribution *tmp;
+
+ if (type == UNSET) tmp = NULL;
+ else if (type == Regular) tmp = new RegularDistribution(&ptr);
+ else if (type == Irregular) {printf("Irregular is not supported\n"); exit(0);}
+ else tmp = NULL;
+
+ *schema_buf = ptr;
+ return tmp;
+}
+
+/* Allocate chunks - Currently only used on the compute node side */
+void Array::allocate_chunks(int node_type)
+{
+ int my_rank;
+ Chunk *new_chunk;
+
+ if (node_type == COMPUTE_NODE) {
+ /* First find out what kind of system we have (MPI or sequential) */
+ if (global_system_type_ == MPI_SYSTEM) {
+ /* Allocate a single chunk with index=compute_node_rank */
+ my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+ new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+ compute_node_layout_->add_last(new_chunk);
+ } else if (global_system_type_ == UNIX_SYSTEM) {
+ /* There is only one kind of Allocation policy */
+ int num = compute_node_layout_->total_elements();
+ for (my_rank=0; my_rank<num; my_rank++) {
+ new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+ compute_node_layout_->add_last(new_chunk);
+ }
+ } else printf("Unsupported filesystem\n");
+ } else if (node_type == IO_NODE) {
+ printf("Will have to do this later\n");
+ } else {
+ printf("Error: Don't know the node type\n");
+ }
+}
+
+/* Allocate chunks with user-specified data pointer. This function
+ * currently supports only the REGULAR distribution of chunks in
+ * the MPI-based file system and
+ * should be called only on the compute node side
+ */
+void Array::allocate_chunks(int node_type, int num_ptrs,
+ char **data_ptr, int stencil_width)
+{
+ int my_rank;
+ Chunk *new_chunk;
+
+ if (node_type == COMPUTE_NODE) {
+ /* First find out what kind of system we have (MPI or sequential) */
+ if (global_system_type_ == MPI_SYSTEM) {
+ /* Allocate a single chunk with index=compute_node_rank */
+ my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+ new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
+ new_chunk->set_data_ptr(data_ptr[0]);
+ new_chunk->set_stencil_width(stencil_width);
+ compute_node_layout_->add_last(new_chunk);
+ } else if (global_system_type_ == UNIX_SYSTEM) {
+ /* There is only one kind of Allocation policy */
+ int num = compute_node_layout_->total_elements();
+ for (my_rank=0; my_rank<num; my_rank++) {
+ new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
+ new_chunk->set_data_ptr(data_ptr[my_rank]);
+ new_chunk->set_stencil_width(stencil_width);
+ compute_node_layout_->add_last(new_chunk);
+ }
+ } else printf("Unsupported filesystem\n");
+ } else if (node_type == IO_NODE) {
+ printf("Will have to do this later\n");
+ } else {
+ printf("Error: Don't know the node type\n");
+ }
+}
+
+Array::~Array()
+{
+ if (name_) free(name_);
+ name_ = NULL;
+ if (compute_node_layout_) delete(compute_node_layout_);
+ if (io_node_layout_) delete(io_node_layout_);
+ if (subchunk_layout_) delete(subchunk_layout_);
+ compute_node_layout_ = io_node_layout_ = subchunk_layout_ = NULL;
+}
+
+/* We are not packing the chunk information here */
+void Array::pack(int** schema_buf, int *schema_size)
+{
+ int *ptr, *head;
+ int i, len;
+
+ ptr = (int *) malloc(sizeof(int)*100);
+ head = ptr;
+
+ *ptr++ = io_strategy_;
+ *ptr++ = op_type_;
+ len = strlen(name_);
+ *ptr++ = len;
+ for(i=0; i<len;i++) *ptr++ = (int) name_[i];
+ *ptr++ = rank_;
+ if (size_) { *ptr++ = 1; for(int i=0; i < rank_;i++) *ptr++ = size_[i]; }
+ else *ptr++ = 0;
+ *ptr++ = element_size_;
+ *ptr++ = ieee_size_;
+ *ptr++ = (int)natural_chunked_;
+ *ptr++ = (int)sub_chunked_;
+ *ptr++ = (int)overlap_;
+
+ if (compute_node_layout_) compute_node_layout_->pack(&ptr);
+ else *ptr++ = (int)UNSET;
+ if (io_node_layout_) io_node_layout_->pack(&ptr);
+ else *ptr++ = (int)UNSET;
+ if (sub_chunked_) subchunk_layout_->pack(&ptr);
+
+ *schema_size = (int)(ptr - head);
+ *schema_buf = head;
+}
+
+ArrayDistribution* Array::layout(int layout_type)
+{
+ switch(layout_type) {
+ case COMPUTE_NODE:
+ return compute_node_layout_;
+ case IO_NODE:
+ return io_node_layout_;
+ case SUB_CHUNK:
+ return subchunk_layout_;
+ default:
+ printf("Invalid type\n");
+ return NULL;
+ }
+}
+
+/* The following two functions are used for regular layouts (HPF-style) only */
+/* Given a chunk index and node type, this function returns the *
+ * the relative node number on which the chunk resides */
+int Array::which_node(int chunk_id, int node_type)
+{
+ if (node_type == COMPUTE_NODE)
+ if (compute_node_layout_->alloc_policy() == REGULAR) return chunk_id;
+ else {
+ printf("Unsupported chunk alloc type\n");
+ exit(1);
+ }
+ else if (node_type == IO_NODE) {
+ printf("Currently this is unsupported\n");
+ exit(1);
+ } else {
+ printf("Unsupported node type\n");
+ exit(1);
+ }
+ return -1;
+}
+
+int Array::which_node(int chunk_id, int node_type, int num_io_nodes)
+{
+ if (node_type == IO_NODE){
+ switch(io_node_layout_->alloc_policy()){
+ case ROUND_ROBIN:
+ return(chunk_id % num_io_nodes);
+
+ default:
+ printf("Error in which_node(int,int,int).. Invalid distribution type\n");
+ exit(1);
+ }
+ } else if (node_type == COMPUTE_NODE) {
+ switch(compute_node_layout_->alloc_policy()){
+ case REGULAR:
+ return chunk_id;
+
+ default:
+ printf("Error in which_node(int,int,int)... Invalid distribution type\n");
+ exit(1);
+ }
+ } else {
+ printf("Error in which_node(int,int,int)... Invalid node type\n");
+ exit(1);
+ }
+ return -1;
+}
+
+Chunk* Array::get_next_chunk()
+{
+ return compute_node_layout_->get_next_chunk();
+}
+
+/* The following seven functions are called by compute nodes only */
+/* Given a chunk index, find the chunk */
+Chunk* Array::find_chunk(int id)
+{
+ List *list = compute_node_layout_->chunk_list();
+ Cell *list_ptr = list != NULL ? list->head_: NULL;
+ Chunk *chunk_ptr;
+
+ while (list_ptr) {
+ chunk_ptr = (Chunk *)list_ptr->item();
+ if (chunk_ptr->chunk_id() == id) return chunk_ptr;
+ list_ptr = list_ptr->next();
+ }
+ return NULL;
+}
+
+int Array::element_size(){return element_size_;}
+int Array::ieee_size(){return ieee_size_;}
+
+Boolean Array::nat_chunked(){return natural_chunked_;}
+
+Boolean Array::sub_chunked(){return sub_chunked_;}
+
+/* This function needs to be checked and refined */
+void Array::make_sub_chunks(Chunk *chunk)
+{
+ Distribution *subchunk_dist;
+ int *subchunk_layout_sizes;
+ int i, tmp_size, dim, val_dim;
+ int *chunk_size = chunk->size();
+
+ if (sub_chunked_) {
+ printf("Error: Array already subchunked\n");
+ exit(1);
+ } else {
+ subchunk_dist = (Distribution *) malloc(sizeof(Distribution)*rank_);
+ subchunk_layout_sizes = (int*) malloc(sizeof(int)*rank_);
+ tmp_size = chunk->total_size_in_bytes();
+ if (tmp_size < SUBCHUNK_SIZE){
+ for(i=0;i<rank_;i++){
+ subchunk_dist[i] = BLOCK;
+ subchunk_layout_sizes[i] = 1;
+ }
+ } else {
+ tmp_size = element_size_;
+ i = rank_;
+ while(tmp_size < SUBCHUNK_SIZE){
+ i--;
+ tmp_size *= chunk_size[i];
+ }
+ dim =i;
+ tmp_size /=chunk_size[i];
+ val_dim = SUBCHUNK_SIZE / tmp_size;
+ for(i=0;i<dim;i++){
+ subchunk_dist[i] = BLOCK;
+ subchunk_layout_sizes[i] = chunk_size[i];
+ }
+ subchunk_dist[dim] = BLOCK;
+ subchunk_layout_sizes[dim] = (chunk_size[i] + val_dim -1)/val_dim;
+ for(i=dim+1;i<rank_; i++){
+ subchunk_dist[i] = BLOCK;
+ subchunk_layout_sizes[i] = 1;
+ }
+ }
+ ArrayLayout *tmp_layout = new ArrayLayout(rank_, subchunk_layout_sizes);
+ subchunk_layout_ = new RegularDistribution(rank_, tmp_layout,
+ subchunk_dist, ROUND_ROBIN, HPF);
+ sub_chunked_ = YES;
+ free(subchunk_layout_sizes);
+ free(subchunk_dist);
+ }
+}
+
+int Array::array_info()
+{
+ List *list = compute_node_layout_->chunk_list();
+ Cell *list_ptr = list->head_;
+ Chunk *chunk_ptr;
+ int ret =0;
+
+ while(list_ptr) {
+ chunk_ptr = (Chunk *)list_ptr->item();
+ ret += chunk_ptr->total_size_in_bytes();
+ list_ptr = list_ptr->next();
+ }
+ return ret;
+}
+
+/* Called only on the I/O node side */
+int Array::get_next_index(Chunk *&chunk, int old_val, int io_node_num,
+ int num_io_nodes, int max)
+{
+ int ret = io_node_layout_->get_next_index(chunk, old_val, io_node_num,
+ num_io_nodes, max);
+ if (io_node_layout_->distribution_type() == Regular)
+ if (ret < max) chunk->init(this, ret, IO_NODE, NO_ALLOC);
+ return ret;
+}
+
+/* This function should be called only on the compute node side and *
+ * make sense only for the regular distribution of chunks, */
+void Array::set_data_ptr(char *data_ptr)
+{
+ List *list = compute_node_layout_->chunk_list();
+ Chunk *chunk_ptr;
+
+ if (list && list->head_){
+ chunk_ptr = (Chunk *) list->head_->item();
+ chunk_ptr->set_data_ptr(data_ptr);
+ } else {
+ printf("Error: No chunks present - cannot set data ptr\n");
+ }
+}
+
+
+/* This function should be called only on the compute node side and *
+ * make sense only for the regular distribution of chunks, */
+char* Array::get_data_ptr()
+{
+ List *list = compute_node_layout_->chunk_list();
+ Chunk *chunk_ptr;
+
+ if (list && list->head_){
+ chunk_ptr = (Chunk *) list->head_->item();
+ return ((char *)chunk_ptr->data_ptr());
+ } else {
+ printf("Error: No chunks present - cannot set data ptr\n");
+ return NULL;
+ }
+}
+
+Boolean Array::overlaped()
+{
+ return overlap_;
+}
+
+void Array::read_schema_file(IOFile file_ptr)
+{
+ int *base = (int *)malloc(sizeof(int) * rank_);
+ int *size = (int *)malloc(sizeof(int) * rank_);
+ int index, length, datatype;
+ Chunk *new_chunk;
+
+ index = IOreadAttributeInfo(file_ptr, "chunk_origin", &datatype, &length);
+ if (index >=0 ) { // the attribute exists
+ IOreadAttribute(file_ptr, index, base);
+ index = IOreadAttributeInfo(file_ptr, "chunk_size",&datatype,&length);
+ if (index < 0) { printf("Error in reading attributes\n"); exit(0); }
+ IOreadAttribute(file_ptr, index, size);
+ new_chunk = new Chunk(this, base, size);
+ } else {
+ for (int j=0; j<rank_; j++) base[j] = 0;
+ new_chunk = new Chunk(this, base, size_);
+ }
+ io_node_layout_ = new IrregularDistribution(1, &new_chunk);
+ free(base);
+ free(size);
+}
+
+/* The collective io operation to write out the arrays. */
+void Array::timestep()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ op_type_ = TIMESTEP;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_array_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+ }
+}
+
+/* The collective io operation to write out the arrays. */
+void Array::checkpoint()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ op_type_ = CHECKPOINT;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_array_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+ }
+}
+
+/* The collective io operation to read in the arrays from a *
+ * checkpoint file. Currently (for testing purposes) this *
+ * does not happen. */
+void Array::restart()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ op_type_ = RESTART;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_array_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+ }
+}
+
+void Array::read_timestep()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ op_type_ = READ_TIMESTEP;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_array_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+ }
+}
+
+int Array::op_type() { return op_type_; }
+int Array::io_strategy() { return io_strategy_; }
+
+void Array::init(int rank, int ieee_size, int *size, int node_type)
+{
+ rank_ = rank;
+ ieee_size_ = ieee_size;
+ element_size_ = IOsizeOf(ieee_size_);
+ size_ = size;
+ if (node_type == COMPUTE_NODE) {
+ int my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+ Chunk *new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+ compute_node_layout_->add_last(new_chunk);
+ }
+}
diff --git a/src/Panda/Array.h b/src/Panda/Array.h
new file mode 100644
index 0000000..834fd36
--- /dev/null
+++ b/src/Panda/Array.h
@@ -0,0 +1,88 @@
+#ifndef Array_dot_h
+#define Array_dot_h
+
+#include "List.h"
+#include "ArrayDistribution.h"
+#include "Chunk.h"
+
+#include "external/IEEEIO/src/Arch.h"
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+
+
+class Array : public Template, public Linkable {
+ protected:
+ ArrayDistribution *compute_node_layout_;
+ ArrayDistribution *io_node_layout_;
+ ArrayDistribution *subchunk_layout_;
+ int element_size_;
+ int ieee_size_;
+ char *name_;
+ Boolean natural_chunked_;
+ Boolean sub_chunked_;
+ Boolean overlap_;
+ int op_type_;
+ int io_strategy_;
+
+ void do_init(char*, int, int*, int,
+ ArrayLayout*, Distribution*,
+ ArrayLayout*, Distribution*,
+ ArrayLayout*, Distribution*,
+ ChunkAllocPolicy, ChunkAllocPolicy,
+ Block_Distribution);
+ void allocate_chunks(int);
+ void allocate_chunks(int,int,char**,int);
+ ArrayDistribution *unpack_layout(int **);
+
+ public:
+ Array(char*,int, int*, int, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*);
+ Array(char*,int, int*, int, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*, char *);
+ Array(char*,int, int*, int, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*);
+ Array(char*,int, int*, int, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*, ArrayLayout*,
+ Distribution*, char *);
+ Array(char*,int, int*, int,
+ ArrayLayout*, Distribution*,
+ ArrayLayout*, Distribution*, char *, int);
+ Array(int **);
+ Array();
+ virtual ~Array();
+ void init(int,int,int*,int);
+ Chunk* get_next_chunk();
+ int which_node(int,int,int);
+ void delete_chunks();
+ void pack(int**, int*);
+ ArrayDistribution* layout(int);
+ int which_node(int,int);
+ Chunk* find_chunk(int);
+ int element_size();
+ int ieee_size();
+ Boolean nat_chunked();
+ Boolean sub_chunked();
+ void make_sub_chunks(Chunk*);
+ int array_info();
+ int get_next_index(Chunk*&,int,int,int,int);
+ int num_of_chunks();
+ void set_data_ptr(char *);
+ char* get_data_ptr();
+ Boolean overlaped();
+ void read_schema_file(IOFile);
+
+ void timestep();
+ void read_timestep();
+ void checkpoint();
+ void restart();
+ int op_type();
+ int io_strategy();
+};
+
+#endif
diff --git a/src/Panda/ArrayDistribution.C b/src/Panda/ArrayDistribution.C
new file mode 100644
index 0000000..04e5226
--- /dev/null
+++ b/src/Panda/ArrayDistribution.C
@@ -0,0 +1,205 @@
+#include "ArrayDistribution.h"
+
+/********************************
+ * ArrayDistribution *
+ ********************************/
+Boolean ArrayDistribution::equal(ArrayDistribution *) { return NO; }
+
+int ArrayDistribution::distribution_type()
+{
+ printf("In ArrayDistributon: distribution_type, shouldn't be called\n");
+ return -1;
+}
+
+ArrayDistribution::ArrayDistribution()
+{
+ num_of_chunks_ = 0;
+ chunk_list_ = new List();
+ current_cell_ = NULL;
+}
+
+ArrayDistribution::ArrayDistribution(int **schema_buf)
+{
+ printf("In ArrayDistributon: init, shouldn't be called\n");
+}
+
+void ArrayDistribution::add_last(Chunk *new_chunk)
+{
+ chunk_list_->add_last(new_chunk);
+ num_of_chunks_++;
+}
+
+ArrayDistribution::~ArrayDistribution()
+{
+ Cell *list_ptr;
+ if (chunk_list_) {
+ list_ptr = chunk_list_->head_;
+ while (list_ptr) {
+ delete list_ptr->item();
+ list_ptr = list_ptr->next();
+ }
+ delete chunk_list_;
+ chunk_list_ = NULL; num_of_chunks_ = 0;
+ }
+}
+
+void ArrayDistribution::pack(int **schema_buf)
+{
+ printf("In ArrayDistributon: pack, shouldn't be called\n");
+}
+
+int ArrayDistribution::get_next_index(Chunk *&chunk, int old_val,
+ int io_node_num,
+ int num_io_nodes, int max)
+{
+ printf("In ArrayDistributon: get_next_index shouldn't be called\n");
+ return max;
+}
+
+List *ArrayDistribution::chunk_list()
+{
+ return chunk_list_;
+}
+
+Chunk* ArrayDistribution::get_next_chunk()
+{
+ if (current_cell_) current_cell_ = current_cell_->next();
+ else current_cell_ = chunk_list_->head_;
+
+ if (current_cell_) return ((Chunk *)current_cell_->item());
+ return NULL;
+}
+
+int ArrayDistribution::total_elements()
+{
+ printf("In ArrayDistributon: total_elements shouldn't be called\n");
+ return 0;
+}
+
+ChunkAllocPolicy ArrayDistribution::alloc_policy()
+{
+ printf("In ArrayDistributon: alloc_policy, shouldn't be called\n");
+ return ROUND_ROBIN;
+}
+
+void ArrayDistribution::list_clear() { current_cell_ = NULL; }
+
+/********************************
+ * RegularDistribution *
+ ********************************/
+RegularDistribution::RegularDistribution(int rank, ArrayLayout *layout,
+ Distribution *dist,
+ ChunkAllocPolicy alloc_policy,
+ Block_Distribution block_dist)
+ : ArrayDistribution()
+{
+ layout_ = new ArrayLayout(layout);
+ rank_ = rank;
+ alloc_policy_ = alloc_policy;
+ dist_ = copy_distribution(rank_, dist);
+ block_dist_ = block_dist;
+}
+
+RegularDistribution::RegularDistribution(int **schema_buf) : ArrayDistribution()
+{
+ int* ptr = *schema_buf;
+ layout_ = new ArrayLayout(&ptr);
+ rank_ = *ptr++;
+ dist_ = new_distribution(&ptr, rank_);
+ alloc_policy_ = (ChunkAllocPolicy)*ptr++;
+ block_dist_ = (Block_Distribution)*ptr++;
+ *schema_buf = ptr;
+}
+
+RegularDistribution::~RegularDistribution()
+{
+ if (layout_) { delete layout_; layout_ = NULL; }
+ if (dist_ ) { free(dist_); dist_ = NULL; }
+}
+
+Boolean RegularDistribution::equal(ArrayDistribution *that)
+{
+ if (!that) return NO;
+
+ RegularDistribution *tmp;
+ if (that->distribution_type() == Regular)
+ tmp = (RegularDistribution *)that;
+ else return NO;
+
+ if (layout_->equal(tmp->layout_) &&
+ equal_distribution(rank_, dist_, tmp->dist_)) return YES;
+ return NO;
+}
+
+ArrayLayout *RegularDistribution::layout()
+{
+ return layout_;
+}
+
+Distribution *RegularDistribution::distribution()
+{
+ return dist_;
+}
+
+void RegularDistribution::pack(int **schema_buf)
+{
+ int* ptr = *schema_buf;
+
+ *ptr++ = (int)Regular;
+ layout_->pack(&ptr);
+ *ptr++ = rank_;
+ pack_distribution(&ptr, rank_, dist_);
+ *ptr++ = (int)alloc_policy_;
+ *ptr++ = block_dist_;
+ *schema_buf = ptr;
+}
+
+int RegularDistribution::distribution_type()
+{
+ return Regular;
+}
+
+int RegularDistribution::total_elements()
+{
+ return layout_->total_elements();
+}
+
+ChunkAllocPolicy RegularDistribution::alloc_policy() { return alloc_policy_; }
+
+int RegularDistribution::get_next_index(Chunk *&chunk, int old_val,
+ int io_node_num,
+ int num_io_nodes, int max)
+{
+ if (old_val == -1) return io_node_num;
+ else return (old_val + num_io_nodes);
+}
+
+Block_Distribution RegularDistribution::block_dist() { return block_dist_; }
+
+/********************************
+ * IrregularDistribution *
+ ********************************/
+int IrregularDistribution::distribution_type()
+{
+ return Irregular;
+}
+
+int IrregularDistribution::total_elements()
+{
+ return num_of_chunks_;
+}
+
+int IrregularDistribution::get_next_index(Chunk *&chunk,
+ int old_val, int io_node_num,
+ int num_io_nodes, int max)
+{
+ chunk = get_next_chunk();
+ if (chunk == NULL) return max;
+ return chunk->chunk_id();
+}
+
+IrregularDistribution::IrregularDistribution(int num, Chunk **chunk_list)
+ : ArrayDistribution()
+{
+ for (int i=0; i<num; i++) add_last(chunk_list[i]);
+}
diff --git a/src/Panda/ArrayDistribution.h b/src/Panda/ArrayDistribution.h
new file mode 100644
index 0000000..12e68e1
--- /dev/null
+++ b/src/Panda/ArrayDistribution.h
@@ -0,0 +1,70 @@
+#ifndef ArrayDistribution_dot_h
+#define ArrayDistribution_dot_h
+
+#include "definitions.h"
+#include "List.h"
+#include "ArrayLayout.h"
+#include "Chunk.h"
+
+class Array;
+class ArrayDistribution
+{
+protected:
+ int num_of_chunks_;
+ List *chunk_list_;
+ Cell *current_cell_;
+public:
+ ArrayDistribution();
+ ArrayDistribution(int **);
+ virtual ~ArrayDistribution();
+ virtual Boolean equal(ArrayDistribution *);
+ virtual int distribution_type();
+ virtual void pack(int **);
+ virtual int total_elements();
+ virtual ChunkAllocPolicy alloc_policy();
+ virtual int get_next_index(Chunk *&,int,int,int,int);
+ List *chunk_list();
+ void add_last(Chunk *);
+ Chunk *get_next_chunk();
+ void list_clear();
+};
+
+
+class RegularDistribution : public ArrayDistribution
+{
+ ArrayLayout *layout_;
+ int rank_;
+ Distribution *dist_;
+ Block_Distribution block_dist_;
+ ChunkAllocPolicy alloc_policy_;
+public:
+ RegularDistribution(int **);
+ RegularDistribution(int , ArrayLayout *,
+ Distribution *, ChunkAllocPolicy,
+ Block_Distribution, int*);
+ RegularDistribution(int , ArrayLayout *,
+ Distribution *, ChunkAllocPolicy,
+ Block_Distribution);
+ ~RegularDistribution();
+ Boolean equal(ArrayDistribution *);
+ ArrayLayout *layout();
+ Distribution *distribution();
+ int distribution_type();
+ void pack(int **);
+ int total_elements();
+ ChunkAllocPolicy alloc_policy();
+ int get_next_index(Chunk *&,int,int,int,int);
+ Block_Distribution block_dist();
+};
+
+class IrregularDistribution : public ArrayDistribution
+{
+public:
+ IrregularDistribution(int, Chunk **);
+ int distribution_type();
+ int total_elements();
+ int get_next_index(Chunk *&,int,int,int,int);
+};
+
+#endif
+
diff --git a/src/Panda/ArrayGroup.C b/src/Panda/ArrayGroup.C
new file mode 100644
index 0000000..afba023
--- /dev/null
+++ b/src/Panda/ArrayGroup.C
@@ -0,0 +1,521 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Array.h"
+#include "ArrayGroup.h"
+
+extern MPIFS *MPIFS_global_obj;
+
+ArrayGroup::ArrayGroup()
+{
+ do_init();
+}
+
+ArrayGroup::ArrayGroup(char *name)
+{
+ do_init();
+ name_ = (char *)malloc(strlen(name)+1);
+ strcpy(name_, name);
+}
+
+
+/* Function to initialize the state of the newly created object */
+void ArrayGroup::do_init()
+{
+ num_of_arrays_ = 0;
+ list_ = new List();
+ io_strategy_ = SIMPLE_IO;
+ interleaved_ = NO;
+ common_layouts_ = NO;
+ common_layout_rank_ = 0;
+ compute_layout_ = NULL;
+ compute_distribution_ = NULL;
+ io_layout_ = NULL;
+ io_distribution_ = NULL;
+ group_io_count_ = 0;
+ read_io_count_ =0;
+ checkpoint_count_ = 1;
+ simulate_ = NO;
+ verify_ = NO;
+ name_ = NULL;
+}
+
+void ArrayGroup::clear()
+{
+ if (name_) free(name_);
+ if (compute_layout_ != NULL) delete compute_layout_;
+ if (compute_distribution_ != NULL) delete compute_distribution_;
+ if (io_layout_ != NULL) delete io_layout_;
+ if (io_distribution_ != NULL) delete io_distribution_;
+ if (list_) delete list_;
+ name_ = NULL;
+ compute_layout_ = NULL; compute_distribution_ = NULL;
+ io_layout_ = NULL; io_distribution_ = NULL;
+ list_ = new List();
+}
+
+/* Destructor function - Note that we don't have to delete the *
+ * arrays in the arraygroup over here. The arrays are deleted *
+ * by the user */
+ArrayGroup::~ArrayGroup()
+{
+ if (name_) free(name_);
+ if (compute_layout_ != NULL) delete compute_layout_;
+ if (compute_distribution_ != NULL) delete compute_distribution_;
+ if (io_layout_ != NULL) delete io_layout_;
+ if (io_distribution_ != NULL) delete io_distribution_;
+ if (list_) delete list_;
+ name_ = NULL;
+ compute_layout_ = NULL;
+ io_layout_ = NULL;
+ compute_distribution_ = NULL;
+ io_distribution_ = NULL;
+ list_ = NULL;
+}
+
+/* Function to delete the arrays in the arraygroup. This is used *
+ * on the io node side to delete the arrays after the collective *
+ * io operation has been completed. On the compute node side, the*
+ * user explicitly deletes the arrays */
+void ArrayGroup::delete_arrays()
+{
+ Cell* list_ptr = (list_ != NULL? list_->head_: NULL);
+ Array* array_ptr;
+
+ while(list_ptr)
+ {
+ array_ptr = (Array *) list_ptr->item();
+ delete array_ptr;
+ list_ptr = list_ptr->next();
+ }
+ if (list_) delete list_;
+ list_ = NULL;
+}
+
+/* Assign id numbers to the arrays in the arraygroup. This function *
+ * must be called at the start of each collective i/o operation. */
+void ArrayGroup::assign_id()
+{
+ Cell* list_ptr = (list_ != NULL? list_->head_: NULL);
+ Array* array_ptr;
+ int i=0;
+
+ while(list_ptr)
+ {
+ array_ptr = (Array *) list_ptr->item();
+ array_ptr->set_array_id(i);
+#ifdef DEBUG
+ printf("Assigned Id %d\n", i);
+#endif
+ i++;
+ list_ptr = list_ptr->next();
+ }
+}
+
+/* Insert a new array. Before inserting the array, check to *
+ * see if it has a common layout with the rest of the arrays */
+void ArrayGroup::insert(Array *new_array)
+{
+ num_of_arrays_++;
+
+ /* Has common layouts since this is the first array */
+ if (num_of_arrays_ == 1)
+ {
+ common_layout_rank_ = new_array->rank();
+ compute_layout_ = new ArrayLayout(new_array->layout(COMPUTE_NODE));
+ compute_distribution_ = copy_distribution(common_layout_rank_,
+ new_array->distribution(COMPUTE_NODE));
+ io_layout_ = new ArrayLayout(new_array->layout(IO_NODE));
+ io_distribution_ = copy_distribution(common_layout_rank_,
+ new_array->distribution(IO_NODE));
+ common_layouts_ = YES;
+ }
+ else if (common_layouts_)
+ {
+ /* check to see if the array has the same layouts/dist */
+ if ((common_layout_rank_ == new_array->rank()) &&
+ (compute_layout_->equal(new_array->layout(COMPUTE_NODE))) &&
+ (io_layout_->equal(new_array->layout(IO_NODE))) &&
+ (equal_distribution(common_layout_rank_, compute_distribution_,
+ new_array->distribution(COMPUTE_NODE))) &&
+ (equal_distribution(common_layout_rank_, io_distribution_,
+ new_array->distribution(IO_NODE))))
+ {
+ common_layouts_ = YES;
+ }
+ else
+ {
+ common_layouts_ = NO;
+ if (io_layout_) delete io_layout_;
+ if (compute_layout_) delete compute_layout_;
+ io_layout_ = compute_layout_ = NULL;
+ if (io_distribution_) free(io_distribution_);
+ if (compute_distribution_) free(compute_distribution_);
+ io_distribution_ = compute_distribution_ = NULL;
+ }
+ }
+
+ list_->add_last(new_array);
+}
+
+/* This function is called on the compute node side at the start *
+ * of each collective io operation. The information is packed *
+ * into an integer buffer. An assumption is made that the a buf *
+ * of 100 ints is sufficent for each array. */
+void ArrayGroup::pack(int** schema, int* schema_size)
+{
+ int *ptr, *head;
+ int i, len;
+
+ /* Assuming that schema size of Array is ~= 100 elts */
+ ptr = (int *) malloc(sizeof(int)*100*(num_of_arrays_+1));
+ head = ptr;
+
+ /* Round about way and space inefficent way of storing a name */
+ *ptr++ = io_strategy_;
+ len = strlen(name_);
+ *ptr++ = len;
+ for(i=0; i<len;i++)
+ *ptr++ = (int) name_[i];
+
+ *ptr++ = num_of_arrays_;
+ *ptr++ = (int) interleaved_;
+ *ptr++ = (int) simulate_;
+ *ptr++ = (int) verify_;
+ *ptr++ = (int) common_layouts_;
+ if (common_layouts_)
+ {
+ *ptr++ = common_layout_rank_ ;
+ compute_layout_->pack(&ptr);
+ pack_distribution(&ptr, common_layout_rank_, compute_distribution_);
+ io_layout_->pack(&ptr);
+ pack_distribution(&ptr, common_layout_rank_, io_distribution_);
+ }
+ *ptr++ = group_io_count_;
+ *ptr++ = checkpoint_count_;
+ *ptr++ = op_type_;
+
+ pack_arrays(&ptr, common_layouts_);
+
+ *schema_size = (int)(ptr - head);
+ *schema = head;
+}
+
+/* This function is called on the I/O node side. After receiving *
+ * the collective io schema, the information is unpacked. The *
+ * arrays are unpacked seperately via a another function call */
+void ArrayGroup::unpack(int **schema_ptr)
+{
+ int *ptr = *schema_ptr;
+ int len;
+
+ /* Unpack the name */
+ io_strategy_ = *ptr++;
+ len = *ptr++;
+ name_ = (char *) malloc(len+1);
+ for(int i=0; i< len; i++)
+ name_[i] = (char) *ptr++;
+ name_[len] = '\0';
+
+ num_of_arrays_ = *ptr++;
+ interleaved_ = (Boolean) *ptr++;
+ simulate_ = (Boolean) *ptr++;
+ verify_ = (Boolean) *ptr++;
+ common_layouts_ = (Boolean) *ptr++;
+ if (common_layouts_)
+ {
+ common_layout_rank_ = *ptr++;
+ compute_layout_ = new ArrayLayout(&ptr);
+ compute_distribution_ = new_distribution(&ptr, common_layout_rank_);
+ io_layout_ = new ArrayLayout(&ptr);
+ io_distribution_ = new_distribution(&ptr, common_layout_rank_);
+ }
+ else
+ {
+ common_layout_rank_ = 0;
+ compute_layout_ = io_layout_ = NULL;
+ compute_distribution_ = io_distribution_ = NULL;
+ }
+ group_io_count_ = *ptr++;
+ checkpoint_count_ = *ptr++;
+ op_type_ = *ptr++;
+
+ /* Arrays are being unpacked seperately */
+ *schema_ptr = ptr;
+}
+
+void ArrayGroup::unpack_arrays(int **schema_buf)
+{
+ Array *array;
+ int i, *ptr = *schema_buf;
+
+ if (common_layouts_){
+ for(i=0;i<num_of_arrays_;i++){
+ array = new Array(&ptr, common_layouts_, compute_layout_,
+ compute_distribution_, io_layout_,
+ io_distribution_);
+ list_->add_last(array);
+ }
+ } else {
+ for(i=0;i<num_of_arrays_;i++){
+ array = new Array(&ptr, common_layouts_);
+ list_->add_last(array);
+ }
+ }
+ *schema_buf = ptr;
+}
+
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::timestep()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+ /* Assign id numbers to each array */
+ assign_id();
+
+ op_type_ = TIMESTEP;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+ }
+ /* Commented out for testing purposes */
+// group_io_count_++;
+}
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::general_write()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+ /* Assign id numbers to each array */
+ assign_id();
+
+ op_type_ = GENERAL_WRITE;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+ }
+ /* Commented out for testing purposes */
+// group_io_count_++;
+}
+
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::checkpoint()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+ /* Assign id numbers to each array */
+ assign_id();
+
+ if (checkpoint_count_ == 0)
+ checkpoint_count_ = 1;
+ else
+ checkpoint_count_ = 0;
+
+ op_type_ = CHECKPOINT;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+ }
+
+}
+
+
+
+/* The collective io operation to read in the arrays from a *
+ * checkpoint file. Currently (for testing purposes) this *
+ * does not happen. */
+void ArrayGroup::restart()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ /* Assign id numbers to each array */
+ assign_id();
+
+ op_type_ = RESTART;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+ }
+}
+
+
+void ArrayGroup::read_timestep()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ /* Assign id numbers to each array */
+ assign_id();
+
+ op_type_ = READ_TIMESTEP;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+ }
+ /* Commented out for testing purposes */
+// read_io_count_++;
+}
+
+
+void ArrayGroup::general_read()
+{
+ int *schema, schema_size;
+ int node_type = MPIFS_global_obj->node_type();
+
+ /* Assign id numbers to each array */
+ assign_id();
+
+ op_type_ = GENERAL_READ;
+ if (node_type == COMPUTE_NODE){
+ MPIFS_global_obj->send_group_schema(this);
+ MPIFS_global_obj->compute_node_io_loop(this);
+ }
+ else if (node_type == PART_TIME_COMPUTE)
+ MPIFS_global_obj->compute_node_io_loop(this);
+ else {
+ pack(&schema, &schema_size);
+ MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+ }
+ /* Commented out for testing purposes */
+// read_io_count_++;
+}
+
+
+
+
+/* Given an array id find the array object in the array group *
+ * The code caches the previous search value and starts the *
+ * search from there. This helps especially in the case of *
+ * when the arrays are accessed sequentially */
+Array* ArrayGroup::find_array(int array_id)
+{
+ Cell* list_ptr = (list_ != NULL ?
+ (list_->old_search_val_ != NULL ? list_->old_search_val_ :
+ list_->head_)
+ : NULL);
+ Array* array_ptr;
+
+ while(list_ptr)
+ {
+ array_ptr = (Array *) list_ptr->item();
+ if (array_ptr->array_id() == array_id)
+ {
+ list_->old_search_val_ = list_ptr->next();
+ return array_ptr;
+ }
+ list_ptr = list_ptr->next();
+ }
+
+ list_ptr = list_->head_;
+ while (list_->old_search_val_ && (list_ptr != list_->old_search_val_))
+ {
+ array_ptr = (Array *) list_ptr->item();
+ if (array_ptr->array_id() == array_id)
+ {
+ list_->old_search_val_ = list_ptr->next();
+ return array_ptr;
+ }
+ list_ptr = list_ptr->next();
+ }
+ return NULL;
+}
+
+/* Pack the arrays into an integer schema buffer. Assumes that the *
+ * data is already allocated. */
+void ArrayGroup::pack_arrays(int **schema_buf, Boolean common_layouts)
+{
+ Cell* list_ptr = (list_ != NULL ? list_->head_ : NULL);
+ Array* array_ptr;
+
+ while(list_ptr)
+ {
+ array_ptr = (Array *) list_ptr->item();
+ array_ptr->pack(schema_buf, common_layouts);
+ list_ptr = list_ptr->next();
+ }
+
+}
+
+
+int ArrayGroup::op_type(){return op_type_;}
+
+void ArrayGroup::set_simulate(){simulate_ = YES;}
+
+void ArrayGroup::reset_simulate(){simulate_ = NO;}
+
+void ArrayGroup::set_simulate_mode(){simulate_ = YES;}
+
+void ArrayGroup::reset_simulate_mode(){simulate_ = NO;}
+
+Boolean ArrayGroup::simulate(){return simulate_;}
+
+
+void ArrayGroup::set_verify(){verify_ = YES;}
+
+void ArrayGroup::reset_verify(){verify_ = NO;}
+
+void ArrayGroup::set_verify_mode(){verify_ = YES;}
+
+void ArrayGroup::reset_verify_mode(){verify_ = NO;}
+
+Boolean ArrayGroup::verify(){return verify_;}
+
+/* This function is called on each compute node side and after all the
+ * arrays have been assigned an id. For each array in the arraygroup,
+ * the function computes the total number of bytes on the compute node
+ */
+void ArrayGroup::init_array_info(int *num_arrays, int **array_bytes_to_go)
+{
+ int *tmp_buf = (int *) malloc(sizeof(int)*num_of_arrays_);
+ *array_bytes_to_go = tmp_buf;
+ *num_arrays = num_of_arrays_;
+
+ for(int i=0; i< num_of_arrays_; i++)
+ tmp_buf[i] = find_array(i)->array_info();
+}
+
+int ArrayGroup::io_strategy(){
+ return io_strategy_;
+}
+
+void ArrayGroup::set_io_strategy(int new_strategy){
+ io_strategy_ = new_strategy;
+}
+
+int ArrayGroup::num_of_arrays()
+{
+ return num_of_arrays_;
+}
diff --git a/src/Panda/ArrayGroup.h b/src/Panda/ArrayGroup.h
new file mode 100644
index 0000000..0cd741b
--- /dev/null
+++ b/src/Panda/ArrayGroup.h
@@ -0,0 +1,75 @@
+#ifndef Arraygroup_dot_h
+#define Arraygroup_dot_h
+
+class Array;
+class ArrayLayout;
+#include "List.h"
+#include "definitions.h"
+
+class ArrayGroup {
+ protected:
+ char *name_; /* Name of the arraygroup */
+ int num_of_arrays_; /* Number of arrays in group */
+ List *list_; /* List of arrays */
+ int io_strategy_;
+
+ /* If all the arrays have the same io and compute node layouts */
+ Boolean common_layouts_;
+ int common_layout_rank_;
+ ArrayLayout *compute_layout_;
+ Distribution *compute_distribution_;
+ ArrayLayout *io_layout_;
+ Distribution *io_distribution_;
+
+
+ int group_io_count_;
+ int read_io_count_;
+ int checkpoint_count_;
+ int op_type_;
+
+
+ Boolean interleaved_;
+ Boolean simulate_;
+ Boolean verify_;
+
+ void do_init();
+ void delete_arrays();
+ void assign_id();
+ void pack_arrays(int**, Boolean);
+
+ public:
+
+ ArrayGroup();
+ ArrayGroup(char *);
+ virtual ~ArrayGroup();
+ void insert(Array*);
+ void pack(int**, int*);
+ void unpack(int**);
+ void timestep();
+ void general_write();
+ void checkpoint();
+ void restart();
+ void read_timestep();
+ void general_read();
+ Array *find_array(int);
+ int op_type();
+ void set_simulate();
+ void reset_simulate();
+ void set_simulate_mode();
+ void reset_simulate_mode();
+ Boolean simulate();
+ Boolean verify();
+ void set_verify();
+ void reset_verify();
+ void set_verify_mode();
+ void reset_verify_mode();
+ void unpack_arrays(int**);
+ void init_array_info(int*,int**);
+ void set_io_strategy(int);
+ int io_strategy();
+ int num_of_arrays();
+ void clear();
+};
+
+#endif
+
diff --git a/src/Panda/ArrayLayout.C b/src/Panda/ArrayLayout.C
new file mode 100644
index 0000000..1398ef4
--- /dev/null
+++ b/src/Panda/ArrayLayout.C
@@ -0,0 +1,179 @@
+#include "definitions.h"
+#include "ArrayLayout.h"
+
+ArrayLayout::ArrayLayout(int Rank, int *sizearray):Template(Rank, sizearray){}
+
+/* Create an arraylayout object using info stored in the schema buffer */
+ArrayLayout::ArrayLayout(int **schema_buf)
+{
+ int* ptr = *schema_buf;
+
+ rank_ = *ptr++;
+ size_ = (int *) malloc(sizeof(int)*rank_);
+ for(int i=0; i < rank_; i++)
+ size_[i] = *ptr++;
+
+ *schema_buf = ptr;
+}
+
+/* Make a copy of an existing ArrayLayout object */
+ArrayLayout::ArrayLayout(ArrayLayout *old_layout)
+{
+ rank_ = old_layout->rank();
+ size_ = copy_int_list(rank_, old_layout->size());
+}
+
+/* Use the destructor of the Template object */
+ArrayLayout::~ArrayLayout()
+{
+}
+
+/* converts a chunk index to a number */
+int ArrayLayout::convert_from_index_to_number(int *indices)
+{
+ int result=0, temp_product=1;
+ for(int i=rank_-1; i>=0; i--)
+ {
+ result += temp_product * indices[i];
+ temp_product *= size_[i];
+ }
+ return result;
+}
+
+/* converts a number to the appropriate chunk index */
+void ArrayLayout::convert_from_number_to_index(int num, int *result)
+{
+ int temp_product=1;
+ int i, j;
+
+ for(i = 0; i< rank_; i++)
+ {
+ temp_product = 1;
+ for(j = i+1 ; j < rank_; j++)
+ temp_product *= size_[j];
+ result[i] = num / temp_product;
+ num -= num/temp_product *temp_product;
+ }
+}
+
+/* converts a number to the appropriate chunk index */
+int* ArrayLayout::convert_from_number_to_index(int num)
+{
+ int* result = (int *) malloc(sizeof(int)*rank_);
+ convert_from_number_to_index(num, result);
+ return result;
+}
+
+/* Check if the input indices are valid. Assumes that the rank of *
+ * input indices are the same as rank of the layout */
+Boolean ArrayLayout::valid_index(int *indices)
+{
+ if (indices == NULL) return NO;
+ else for(int i=0; i<rank_; i++)
+ {
+ if ((indices[i] < 0) || (indices[i] >= size_[i]))
+ return NO;
+ }
+ return YES;
+}
+
+/* Checks if the specified input distribution of the array is *
+ * compatible with the layout. It is compatible only if the *
+ * number of dimensions in which the array is distributed in a *
+ * BLOCK or CYCLIC fashion is equal to the rank of the layout */
+Boolean ArrayLayout::valid_distribution(int array_rank, Distribution* dist)
+{
+ if ((array_rank <= 0) || (dist == NULL)) return NO;
+ else {
+ int block_or_cyclic=0, i;
+ for (i=0;i<array_rank;i++)
+ if ((dist[i]==BLOCK)||(dist[i]==CYCLIC)) block_or_cyclic++;
+ if (block_or_cyclic != rank_)
+ return NO;
+ else
+ return YES;
+ }
+}
+
+
+int ArrayLayout::size(int i){return size_[i];}
+
+
+
+
+/* This function is used to return a linked list of numbers (representing the
+ * indices of the compute node chunks which overlap with the io node chunk)
+ * given the base, size of the overlapping layout (??).
+ *
+ * The function assumes that the input is valid
+ */
+void ArrayLayout::indices_list(int *index_base, int *index_size,
+ int *num, int *ret_list)
+{
+ int *ptr=ret_list;
+ int size=1;
+ for(int i=0; i < rank(); i++)
+ size *= index_size[i];
+ *num = size;
+ calculate_indices(index_base, index_size, rank(), 0, &ptr);
+}
+
+/* Recursive function to convert a layout into a list of numbers */
+void ArrayLayout::calculate_indices(int *index_base, int *index_size,
+ int my_rank, int sum, int **buf_ptr)
+{
+ int prod=1, i;
+ int *ptr;
+
+#ifdef DEBUG
+ printf("In calculate indices rank=%d sum=%d *buf=%ld\n", my_rank, sum, *buf_ptr);
+#endif
+ if (my_rank > 1)
+ {
+ for(i=rank()-1; i > (rank() - my_rank) ; i--)
+ prod *= size_[i];
+ for(i=0 ; i < index_size[rank()-my_rank]; i++)
+ calculate_indices(index_base, index_size, my_rank-1,
+ sum + (index_base[rank()-my_rank]+i)*prod, buf_ptr);
+ }
+ else
+ {
+ for(i=0; i < index_size[rank()-my_rank]; i++)
+ {
+ ptr = *buf_ptr;
+ *ptr++ = sum + (index_base[rank()-my_rank]+i);
+#ifdef DEBUG
+ printf("In calculate indices *buf=%ld val=%d\n", *buf_ptr, **buf_ptr);
+#endif
+ *buf_ptr = ptr;
+ }
+ }
+}
+
+/* Pack the info into the schema buffer */
+void ArrayLayout::pack(int **schema_buf)
+{
+ int* ptr = *schema_buf;
+
+ *ptr++ = rank_;
+ for(int i=0; i< rank_; i++)
+ *ptr++ = size_[i];
+
+ *schema_buf = ptr;
+}
+
+/* Check if the two layouts are equal */
+Boolean ArrayLayout::equal(ArrayLayout *layout)
+{
+ if (rank_ != layout->rank()) return NO;
+ for(int i=0; i<rank_; i++)
+ if (size_[i] != layout->size(i)) return NO;
+ return YES;
+}
+
+
+
+int* ArrayLayout::size(){return size_;}
+
+
+
diff --git a/src/Panda/ArrayLayout.h b/src/Panda/ArrayLayout.h
new file mode 100644
index 0000000..d5e0a65
--- /dev/null
+++ b/src/Panda/ArrayLayout.h
@@ -0,0 +1,26 @@
+#ifndef ArrayLayout_dot_h
+#define ArrayLayout_dot_h
+
+#include "Template.h"
+
+class ArrayLayout : public Template {
+ /* Inherits rank_,size_ from Template */
+ public:
+ ArrayLayout(int Rank, int *sizearray);
+ ArrayLayout(int** schema_buf);
+ ArrayLayout(ArrayLayout *old_layout);
+ virtual ~ArrayLayout();
+ void pack(int** schema_buf);
+ int convert_from_index_to_number(int *indices);
+ int* convert_from_number_to_index(int num);
+ void convert_from_number_to_index(int num, int *result);
+ Boolean valid_index(int *);
+ Boolean valid_distribution(int, Distribution*);
+ Boolean equal(ArrayLayout*);
+ int size(int);
+ int* size();
+ void indices_list(int*, int*, int*, int*);
+ void calculate_indices(int*,int*,int,int,int**);
+};
+
+#endif
diff --git a/src/Panda/Attribute.C b/src/Panda/Attribute.C
new file mode 100644
index 0000000..0c50f04
--- /dev/null
+++ b/src/Panda/Attribute.C
@@ -0,0 +1,187 @@
+#include "definitions.h"
+#include "Attribute.h"
+#include "MPIFS.h"
+#include "string.h"
+
+
+extern MPIFS *MPIFS_global_obj;
+extern "C" {
+ int IOwriteAttribute(IOFile,char*,int,int,void *);
+ int IOsizeOf(int);
+ int IOreadAttributeInfo(IOFile,char*,int*,int*);
+ int IOreadAttribute(IOFile,int,void*);
+// IOFile IEEEopen(char *,char *);
+}
+
+Attribute::Attribute()
+{
+ name_ = NULL;
+ data_status_ = 0;
+ data_ = NULL;
+}
+
+void Attribute::init(char *name)
+{
+ int len = strlen(name);
+ name_ = (char *)malloc(sizeof(char) * (len + 1));
+ for (int i=0; i<len; i++) name_[i] = name[i];
+ name_[i] = '\0';
+}
+
+void Attribute::init(char *name, int esize, int count, void *data)
+{
+ int len = strlen(name);
+ name_ = (char *)malloc(sizeof(char) * (len + 1));
+ for (int i=0; i<len; i++) name_[i] = name[i];
+ name_[i] = '\0';
+ esize_ = esize;
+ count_ = count;
+ data_ = data;
+ data_status_ = 0;
+}
+
+Attribute::~Attribute()
+{
+ if (name_) free(name_);
+ if (data_status_ && data_) free(data_);
+}
+
+void Attribute::pack(int &schema_len, char *&schema, char *fname, int op_type)
+{
+ union int_to_char tmp;
+ int i, real_size = IOsizeOf(esize_);
+
+ int len1 = strlen(fname);
+ int len = strlen(name_);
+ if (op_type == TIMESTEP)
+ schema_len = 5 * sizeof(int) + len1 + len + real_size * count_;
+ else schema_len = 3 * sizeof(int) + len1 + len;
+ schema = (char *)malloc(sizeof(char) * schema_len);
+ char *ptr = schema;
+
+ tmp.i = op_type;
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ tmp.i = len1;
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ for (i=0; i<len1; i++) *ptr++ = fname[i];
+ tmp.i = len;
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ for (i=0; i<len; i++) *ptr++ = name_[i];
+
+ if (op_type == TIMESTEP) {
+ tmp.i = esize_;
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ tmp.i = count_;
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ memcpy(ptr, data_, real_size * count_);
+ }
+}
+
+Attribute::Attribute(char *schema, int op_type)
+{
+ union int_to_char tmp;
+ int i, len, real_size;
+ char *ptr = schema;
+
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ len = tmp.i;
+ name_ = (char *)malloc(sizeof(char) * (len + 1));
+ for (i=0; i<len; i++) name_[i] = *ptr++;
+ name_[i] = '\0';
+
+ if (op_type == TIMESTEP) {
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ esize_ = tmp.i;
+ real_size = IOsizeOf(esize_);
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ count_ = tmp.i;
+ data_ = (void *)malloc(esize_ * count_);
+ memcpy(data_, ptr, real_size * count_);
+ data_status_ = 1;
+ }
+}
+
+void Attribute::read(char *fname, char *n)
+{
+ int node_type = MPIFS_global_obj->node_type();
+ IOFile fp;
+
+ if (node_type == PART_TIME_COMPUTE || node_type == COMPUTE_NODE) {
+ if (MPIFS_global_obj->am_master_compute_node()) {
+ init(n);
+ MPIFS_global_obj->send_attr_schema(this, fname, READ_TIMESTEP);
+ }
+ MPIFS_global_obj->receive_attr_data(this);
+ } else { // PART_TIME_IO
+ init(n);
+ if (MPIFS_global_obj->am_master_compute_node())
+ MPIFS_global_obj->send_attr_schema(this, fname, READ_TIMESTEP);
+ MPIFS_global_obj->receive_attr_schema();
+
+ int len = strlen(fname);
+ char *name = (char *)malloc(sizeof(char) * (len+1));
+ char *name1 = (char *)malloc(sizeof(char) * (len+6));
+ for (int i=0; i<len; i++) name[i] = fname[i];
+ name[i] = '\0';
+ sprintf(name1, "%s.%d", name, MPIFS_global_obj->my_rank(IO_NODE));
+ fp = MPIFS_global_obj->open_file(name1, READ_TIMESTEP);
+ read_data(fp);
+ if (MPIFS_global_obj->am_master_io_node()) {
+ MPIFS_global_obj->send_attr_data(this);
+ }
+ MPIFS_global_obj->receive_attr_data(this);
+ free(name);
+ }
+}
+
+void Attribute::write(char *fname, char *n, int esize, int count, void *data)
+{
+ int node_type = MPIFS_global_obj->node_type();
+
+ if (node_type == PART_TIME_COMPUTE || node_type == COMPUTE_NODE) {
+ if (MPIFS_global_obj->am_master_compute_node()) {
+ init(n, esize, count, data);
+ MPIFS_global_obj->send_attr_schema(this, fname, TIMESTEP);
+ }
+ } else { // PART_TIME_IO
+ init(n, esize, count, data);
+ if (MPIFS_global_obj->am_master_compute_node())
+ MPIFS_global_obj->send_attr_schema(this, fname, TIMESTEP);
+ MPIFS_global_obj->receive_attr_schema();
+
+ IOFile fp;
+ int len = strlen(fname);
+ char *name = (char *)malloc(sizeof(char) * (len+1));
+ char *name1 = (char *)malloc(sizeof(char) * (len+6));
+ for (int i=0; i<len; i++) name[i] = fname[i];
+ name[i] = '\0';
+ sprintf(name1, "%s.%d", name, MPIFS_global_obj->my_rank(IO_NODE));
+
+ fp = MPIFS_global_obj->open_file(name1, TIMESTEP);
+
+ write_data(fp);
+ free(name);
+ }
+}
+
+void Attribute::write_data(IOFile fp)
+{
+ IOwriteAttribute(fp, name_, esize_, count_, data_);
+}
+
+void Attribute::read_data(IOFile fp)
+{
+ int index = IOreadAttributeInfo(fp, name_, &esize_, &count_);
+ if (index >= 0) {
+ data_ = (void *)malloc(IOsizeOf(esize_) * count_);
+ IOreadAttribute(fp, index, data_);
+ } else printf("Fail to read attribute %s\n", name_);
+}
+
+void *Attribute::get_data_ptr() { return data_; }
+void Attribute::set_data_ptr(void *d) { data_ = d; }
+int Attribute::data_size() { return IOsizeOf(esize_) * count_; }
+int Attribute::esize() { return esize_; }
+int Attribute::count() { return count_; }
+void Attribute::set_count(int c) { count_ = c; }
+void Attribute::set_esize(int e) { esize_ = e; }
diff --git a/src/Panda/Attribute.h b/src/Panda/Attribute.h
new file mode 100644
index 0000000..d948316
--- /dev/null
+++ b/src/Panda/Attribute.h
@@ -0,0 +1,43 @@
+#ifndef Attribute_dot_h
+#define Attribute_dot_h
+
+#include "definitions.h"
+
+
+typedef union int_to_char {
+ int i;
+ char c[4];
+} int_to_char;
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+#include "external/IEEEIO/src/Arch.h"
+
+class Attribute {
+ char *name_;
+ int esize_;
+ int count_;
+ void *data_;
+ int data_status_; // 0: no alloc, 1: alloc
+
+public:
+ Attribute();
+ Attribute(char *, int);
+ ~Attribute();
+ void init(char *, int, int, void *);
+ void init(char *);
+ void pack(int &, char *&, char *, int);
+ void write(char *, char *, int, int, void *);
+ void read(char *, char *);
+ void write_data(IOFile);
+ void read_data(IOFile);
+ void *get_data_ptr();
+ void set_data_ptr(void *);
+ int data_size();
+ int esize();
+ int count();
+ void set_esize(int);
+ void set_count(int);
+};
+
+#endif
diff --git a/src/Panda/CSDIO.C b/src/Panda/CSDIO.C
new file mode 100644
index 0000000..b2d4064
--- /dev/null
+++ b/src/Panda/CSDIO.C
@@ -0,0 +1,694 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "CSDIO.h"
+#include "List.h"
+
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+/* This code is executed on the compute nodes (excluding the part-time i/o
+ * nodes).
+ */
+void CSDIO::compute_node_io_loop(ArrayGroup *group)
+{
+ int array_idx;
+ Boolean read_op;
+
+ op_type_ = group->op_type();
+ if ((op_type_ == RESTART) || (op_type_ == GENERAL_READ) ||
+ (op_type_ == READ_TIMESTEP)){
+ read_op = YES;
+ } else {
+ read_op = NO;
+ }
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+ num_of_arrays_ = group->num_of_arrays();
+ receive_io_app_info();
+ num_io_nodes_ = io_app_info_->app_size();
+#ifdef DEBUG
+ printf("%d: op_type_ = %d read_op =%d\n", world_rank_, op_type_, read_op);
+ printf("%d: Compute node - num of arrays %d - num of io_nodes %d\n",
+ world_rank_, num_of_arrays_, num_io_nodes_);
+#endif
+ comp_current_array_ = new Array();
+ comp_current_array_id_ = -1;
+
+ for(array_idx = 0; array_idx < num_of_arrays_; array_idx++){
+ while(!process_compute_side_array(group, array_idx, read_op)){};
+ }
+ delete comp_current_array_;
+ comp_current_array_ = NULL;
+}
+
+/* An array is stored in the comp_current_array_. this must be instatntiated
+ * before use. If the input array_id is the same as that stored in
+ * comp_current_array_id_, then it means that all the required sends/recvs
+ * have been posted and all we have to do is to verify its completion. If
+ * they are different, then it means that we have to start the i/o for the
+ * new array.
+ */
+Boolean CSDIO::process_compute_side_array(ArrayGroup *group,
+ int array_idx, Boolean read_op)
+{
+ int make_subchunks=-1, tag, tag_ctr=0, buf_size, bytes_to_go, flag, i;
+ char *tmp_buf;
+ void *void_buf;
+ Chunk *compute_chunk=NULL, *io_chunk=NULL, *subchunk=NULL;
+
+ if (comp_current_array_id_ != array_idx){
+ /* We have to post the sends/recvs for this array*/
+
+ comp_current_array_->copy(group->find_array(array_idx));
+ comp_array_rank_ = comp_current_array_->rank();
+ if (comp_array_rank_ > max_comp_rank_){
+ realloc_compute_schema_bufs(comp_array_rank_);
+ }
+
+ nat_chunked_ = comp_current_array_->nat_chunked();
+ sub_chunked_ = comp_current_array_->sub_chunked();
+ if (nat_chunked_ && !sub_chunked_)
+ contiguous_ = YES;
+ else
+ contiguous_ = NO;
+ compute_pending_ = 0;
+
+ if (contiguous_){
+ /* Nat chunking with no user-specified chunking. We don't need
+ * to use any MPI dervied datatypes.
+ */
+ comp_current_array_->list_clear();
+ compute_chunk = comp_current_array_->get_next_chunk();
+ while(compute_chunk != NULL){
+ comp_current_chunk_id_ = compute_chunk->chunk_id();
+ io_overlaps_ = 1;
+ io_overlap_chunk_ids_[0] = comp_current_chunk_id_;
+ io_dest_ids_[0] = io_app_info_->world_rank(comp_current_array_->which_node(
+ comp_current_chunk_id_,
+ IO_NODE, num_io_nodes_));
+
+ if (io_dest_ids_[0] == world_rank_){
+ /* Part-time case - do nothing, the io node should take
+ care of this */
+ }
+ else {
+ bytes_to_go = compute_chunk->total_size_in_bytes();
+ tmp_buf = (char *)compute_chunk->data_ptr();
+ tag_ctr = 0;
+ while(bytes_to_go > 0){
+ buf_size = min(SUBCHUNK_SIZE, bytes_to_go);
+ if (compute_pending_ >= max_pending_){
+ realloc_pending_messages(compute_pending_+1);
+ }
+
+ tag = comp_current_chunk_id_ * 1000 + tag_ctr*10;
+ if (read_op)
+ nb_receive_message((void *) tmp_buf, buf_size, MPI_CHAR,
+ io_dest_ids_[0], tag + CHUNK_DATA_FROM_IO,
+ MPI_COMM_WORLD,
+ &comp_requests_[compute_pending_]);
+ else
+ nb_send_message((void *) tmp_buf, buf_size, MPI_CHAR,
+ io_dest_ids_[0], tag + CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD,
+ &comp_requests_[compute_pending_]);
+
+ tag_ctr++;
+ tmp_buf += buf_size;
+ bytes_to_go -= buf_size;
+ compute_pending_++;
+ }
+ }
+
+ compute_chunk = comp_current_array_->get_next_chunk();
+ }
+
+ comp_current_array_->list_clear();
+ } /* End if contiguous */
+ else {
+ /* We have to use mpi-derived datatypes */
+ make_subchunks = -1;
+ io_chunk = new Chunk();
+ subchunk = new Chunk();
+ comp_current_array_->list_clear();
+
+ compute_chunk = comp_current_array_->get_next_chunk();
+ while (compute_chunk != NULL){
+ comp_current_chunk_id_ = compute_chunk->chunk_id();
+
+ /* Determine the overlapping I/O chunks */
+ io_chunk_overlaps(comp_current_array_, compute_chunk);
+ for( i=0;i< io_overlaps_;i++){
+ if (io_dest_ids_[i] != world_rank_){
+ /* Different node- so we have to post the send/recv */
+ io_chunk->init(comp_current_array_, io_overlap_chunk_ids_[i],
+ IO_NODE, NO_ALLOC);
+ if (!sub_chunked_ && (make_subchunks == -1)){
+ comp_current_array_->make_sub_chunks(io_chunk);
+ make_subchunks = 1;
+ }
+
+ tag_ctr=0;
+ comp_num_of_subchunks_ =
+ comp_current_array_->layout(SUB_CHUNK)->total_elements();
+#ifdef DEBUG
+ printf("comp_num_of_subchunks_ = %d\n", comp_num_of_subchunks_);
+#endif
+ for(comp_current_subchunk_id_ = 0;
+ comp_current_subchunk_id_ < comp_num_of_subchunks_;
+ comp_current_subchunk_id_++){
+#ifdef DEBUG
+ printf("io_chunk = %d subchunk_id = %d\n",
+ io_chunk->chunk_id(), comp_current_subchunk_id_);
+#endif
+ subchunk->init(io_chunk, comp_current_subchunk_id_, NO_ALLOC);
+ subchunk->compute_overlap(compute_chunk, comp_overlap_base_,
+ comp_overlap_size_, comp_overlap_stride_);
+ buf_size = num_elements(comp_array_rank_, comp_overlap_size_);
+ if (buf_size > 0){
+ /* Something to send */
+ if (compute_pending_ >= max_pending_){
+ realloc_pending_messages(compute_pending_+1);
+ }
+ void_buf = (void *)tmp_buf;
+ compute_chunk->make_datatype(comp_overlap_base_, comp_overlap_size_,
+ comp_overlap_stride_,
+ &void_buf,
+ &comp_datatypes_[compute_pending_]);
+ tmp_buf = (char *)void_buf;
+ tag = io_chunk->chunk_id()*1000 + tag_ctr*10;
+ if (read_op)
+ nb_receive_message((void *) tmp_buf, 1,
+ comp_datatypes_[compute_pending_],
+ io_dest_ids_[i],
+ tag + CHUNK_DATA_FROM_IO,
+ MPI_COMM_WORLD,
+ &comp_requests_[compute_pending_]);
+ else
+ nb_send_message((void *) tmp_buf, 1,
+ comp_datatypes_[compute_pending_],
+ io_dest_ids_[i], tag + CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD,
+ &comp_requests_[compute_pending_]);
+ compute_pending_++;
+ }
+ tag_ctr++;
+ }
+ }
+ }
+ compute_chunk = comp_current_array_->get_next_chunk();
+ }
+ }
+ comp_current_array_id_ = array_idx;
+ return NO;
+ } else {
+ if (part_time_io_){
+ /* Just test and get back to io-node stuff */
+ MPI_Testall(compute_pending_, comp_requests_, &flag, comp_statuses_);
+ if (flag){
+ if (!contiguous_){
+ for(i=0; i<compute_pending_;i++)
+ MPI_Type_free(&comp_datatypes_[i]);
+ }
+ compute_pending_ = 0;
+ comp_current_array_->clear();
+ return YES;
+ }
+ } else {
+#ifdef DEBUG
+ printf("%d: Waiting for %d messages to complete\n", world_rank_,
+ compute_pending_);
+#endif
+ MPI_Waitall(compute_pending_, comp_requests_, comp_statuses_);
+ if (!contiguous_){
+ for(i=0; i<compute_pending_;i++)
+ MPI_Type_free(&comp_datatypes_[i]);
+ }
+#ifdef DEBUG
+ printf("%d: Done waiting \n", world_rank_);
+#endif
+
+ compute_pending_ = 0;
+ comp_current_array_->clear();
+ return YES;
+ }
+ return NO;
+ }
+}
+
+
+void CSDIO::start_to_finish(Boolean part_time,
+ ArrayGroup *compute_group)
+{
+ int array_idx, make_subchunks, bytes_to_go, buf_size, tag_ctr, tag;
+ Boolean read_op, part_time_done;
+ Chunk *chunk=NULL, *subchunk=NULL, *compute_chunk=NULL;
+
+ /* Don't ask me why. Ask szu-Wen */
+ comp_current_array_id_ = -1;
+
+ if ((op_type_ == RESTART) || (op_type_ == GENERAL_READ) ||
+ (op_type_ == READ_TIMESTEP)){
+ read_op = YES;
+ } else {
+ read_op = NO;
+ }
+
+ part_time_io_ = part_time;
+ compute_node_group_ = compute_group;
+ comp_current_array_ = NULL;
+ if (part_time_io_){
+ comp_current_array_ = new Array();
+ }
+
+ /* Receive the i/o node information */
+ receive_io_app_info();
+
+ /* To reduce costs associated with object creation and deletion, we *
+ * will create a dummy chunk,subchunk and compute chunk object and *
+ * re-initialize them whenever necessary. */
+ chunk = new Chunk();
+ current_chunk_ = chunk;
+ subchunk = new Chunk();
+ compute_chunk = new Chunk();
+
+ for(array_idx=0; array_idx<num_of_arrays_; array_idx++){
+
+ if (part_time_io_)
+ part_time_done = process_compute_side_array(compute_group, array_idx, read_op);
+
+ make_subchunks = -1;
+ current_array_ = find_array(array_idx);
+ nat_chunked_ = current_array_->nat_chunked();
+ sub_chunked_ = current_array_->sub_chunked();
+ if (nat_chunked_ && !sub_chunked_)
+ contiguous_ = YES;
+ else
+ contiguous_ = NO;
+
+ array_rank_ = current_array_->rank();
+ if (array_rank_ > max_rank_){
+ realloc_schema_bufs(array_rank_);
+ }
+
+ num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+ current_chunk_id_ = current_array_->get_next_index(-1, my_io_rank_,
+ num_io_nodes_);
+ if (contiguous_){
+ /* Natural chunked and no-user specified subchunking */
+
+ while(current_chunk_id_ < num_of_chunks_){
+ num_overlaps_ = 1;
+ overlap_chunk_ids_[0] = current_chunk_id_;
+ dest_ids_[0] = app_info_->world_rank(current_array_->which_node(
+ current_chunk_id_, COMPUTE_NODE));
+ if (part_time_io_ && (world_rank_ == dest_ids_[0])){
+ direct_io(array_idx, current_chunk_id_, read_op, NULL, NULL);
+ } else {
+ chunk->init(current_array_, current_chunk_id_, IO_NODE, NO_ALLOC);
+ bytes_to_go = chunk->total_size_in_bytes();
+ chunk->set_data_ptr(mem_buf_);
+
+ /* We don't have to make the schema requests - just post the
+ send/recv */
+ tag_ctr = 0;
+ while (bytes_to_go > 0){
+ buf_size = min(SUBCHUNK_SIZE, bytes_to_go);
+ tag = current_chunk_id_*1000+tag_ctr*10;
+ if (read_op) {
+ read_data(mem_buf_, buf_size);
+ nb_send_message((void *)mem_buf_, buf_size, MPI_CHAR,
+ dest_ids_[0],
+ tag+CHUNK_DATA_FROM_IO, MPI_COMM_WORLD,
+ &requests_[0]);
+ wait_for_completion();
+ } else {
+ nb_receive_message((void *)mem_buf_, buf_size, MPI_CHAR,
+ dest_ids_[0],tag+CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD, &requests_[0]);
+
+ wait_for_completion();
+ write_data(mem_buf_, buf_size, chunk->element_size());
+ }
+ bytes_to_go -= buf_size;
+ tag_ctr++;
+ }
+ chunk->set_data_ptr(NULL);
+ }
+
+ current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_);
+ }
+ } /* End if contiguous_ */
+ else { /* Have to use MPI-derived datatypes */
+
+ while(current_chunk_id_ < num_of_chunks_){
+ chunk->init(current_array_, current_chunk_id_, IO_NODE, NO_ALLOC);
+ if (!sub_chunked_ && (make_subchunks == -1)){
+ current_array_->make_sub_chunks(chunk);
+ make_subchunks = 1;
+ }
+ num_of_subchunks_=current_array_->layout(SUB_CHUNK)->total_elements();
+ tag_ctr=0;
+
+ for(current_subchunk_id_ = 0; current_subchunk_id_ < num_of_subchunks_;
+ current_subchunk_id_++){
+ subchunk->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+ bytes_to_go = subchunk->total_size_in_bytes();
+
+ if (bytes_to_go > mem_buf_size_){
+ realloc_mem_bufs(bytes_to_go);
+ }
+ subchunk->set_data_ptr(mem_buf_);
+
+ compute_chunk_overlaps(current_array_, subchunk);
+ compute_schemas(current_array_, subchunk, compute_chunk, array_idx);
+
+ tag = current_chunk_id_ * 1000 + tag_ctr*10;
+ if (read_op){
+ read_data(subchunk);
+ send_data_to_compute_nodes(subchunk, tag);
+ wait_for_completion();
+ } else {
+ receive_data_from_compute_nodes(subchunk, tag);
+ wait_for_completion();
+ write_data(subchunk);
+ }
+ tag_ctr++;
+ subchunk->set_data_ptr(NULL);
+ }
+ current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_);
+ }
+ }
+ if (part_time_io_)
+ while (!process_compute_side_array(compute_group, array_idx, read_op)){};
+ }
+
+ /* Free the temp chunk objects */
+ delete(chunk);
+ delete(subchunk);
+ delete(compute_chunk);
+ chunk = current_chunk_ = subchunk = compute_chunk = NULL;
+ if (comp_current_array_){
+ delete(comp_current_array_);
+ comp_current_array_ = NULL;
+ }
+}
+
+
+/* This constructor is for pure io_nodes only */
+CSDIO::CSDIO(int *schema_string, int schema_size, int world_rank,
+ int comp_app_num, int comp_app_size, App_Info *app_info):
+ Simple_IO(schema_string, schema_size, world_rank, comp_app_num,
+ comp_app_size, app_info)
+{
+ clear();
+}
+
+
+/* This call is for compute nodes only */
+CSDIO::CSDIO()
+{
+ do_init();
+}
+
+CSDIO::CSDIO(int *schema_string, int schema_size, int world_rank,
+ int comp_app_num, int comp_app_size, App_Info *app_info, Boolean part_time):
+ Simple_IO(schema_string, schema_size, world_rank, comp_app_num,
+ comp_app_size, app_info)
+{
+ if (part_time){
+ /* This is a part-time i/o node */
+ do_init();
+ part_time_io_ = part_time;
+ } else {
+ clear();
+ }
+}
+
+void CSDIO::clear()
+{
+ comp_datatypes_ = NULL;
+ comp_requests_ = NULL;
+ comp_statuses_ = NULL;
+ io_overlap_chunk_ids_ = io_dest_ids_ = comp_overlap_base_ = NULL;
+ comp_overlap_size_ = comp_overlap_stride_ = NULL;
+ io_app_info_ = NULL;
+}
+
+void CSDIO::do_init()
+{
+ max_pending_ = 1;
+ compute_pending_ = 0;
+ comp_datatypes_ = (MPI_Datatype *)malloc(sizeof(MPI_Datatype)*max_pending_);
+ comp_requests_ = (MPI_Request *)malloc(sizeof(MPI_Request)*max_pending_);
+ comp_statuses_ = (MPI_Status *)malloc(sizeof(MPI_Status)*max_pending_);
+
+ io_max_overlaps_ = 1;
+ io_overlaps_ =0;
+ io_overlap_chunk_ids_ = (int *) malloc(sizeof(int)*io_max_overlaps_);
+ io_dest_ids_ = (int *) malloc(sizeof(int)*io_max_overlaps_);
+
+ max_comp_rank_ = 10;
+ comp_array_rank_ = 0;
+ comp_overlap_base_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+ comp_overlap_size_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+ comp_overlap_stride_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+}
+
+
+
+
+CSDIO::~CSDIO()
+{
+ if (part_time_io_ || dummy_){
+ if (comp_datatypes_) free(comp_datatypes_);
+ if (comp_requests_) free(comp_requests_);
+ if (comp_statuses_) free(comp_statuses_);
+ if (comp_overlap_base_) free(comp_overlap_base_);
+ if (comp_overlap_size_) free(comp_overlap_size_);
+ if (comp_overlap_stride_) free(comp_overlap_stride_);
+ if (io_overlap_chunk_ids_) free(io_overlap_chunk_ids_);
+ if (io_dest_ids_) free(io_dest_ids_);
+ if (comp_current_array_) delete(comp_current_array_);
+ if (io_app_info_) delete(io_app_info_);
+ };
+ clear();
+}
+
+void CSDIO::receive_io_app_info()
+{
+ int node_type = MPIFS_global_obj->node_type();
+ int num_of_world_nodes, app_info_buf_size, *app_info_buf;
+ int tag = APP_INFO * 10 + SPECIAL;
+ App_Info *tmp_info = NULL;
+ MPI_Status app_status;
+
+ MPI_Comm_size(MPI_COMM_WORLD, &num_of_world_nodes);
+ app_info_buf_size = num_of_world_nodes+2; /* Num io nodes <= total nodes */
+ app_info_buf = (int *)malloc(sizeof(int)*app_info_buf_size);
+
+ if (node_type == IO_NODE){
+ /* Master i/o node sends io app info to the master compute node */
+ if (MPIFS_global_obj->am_master_io_node()){
+ tmp_info = MPIFS_global_obj->io_app_info();
+ app_info_buf[0] = tmp_info->app_num();
+ app_info_buf[1] = tmp_info->app_size();
+ tmp_info->world_ranks(&app_info_buf[2]);
+ app_info_buf_size = app_info_buf[1] + 2;
+#ifdef DEBUG
+printf("%d:app_num=%d app_size=%d\n", world_rank_, app_info_buf[0],
+ app_info_buf[1]);
+printf("sending messages to %d\n", app_info_->get_master());
+#endif
+ send_message((void *) app_info_buf, app_info_buf_size, MPI_INT,
+ app_info_->get_master(), tag, MPI_COMM_WORLD);
+#ifdef DEBUG
+ printf("%d: %d %d %d\n", world_rank_,app_info_buf[0], app_info_buf[1],
+ app_info_buf[2]);
+#endif
+ }
+ } else if (node_type == PART_TIME_IO){
+ if (MPIFS_global_obj->am_master_io_node()){
+ tmp_info = MPIFS_global_obj->io_app_info();
+ app_info_buf[0] = tmp_info->app_num();
+ app_info_buf[1] = tmp_info->app_size();
+ tmp_info->world_ranks(&app_info_buf[2]);
+ app_info_buf_size = app_info_buf[1] + 2;
+
+ if (MPIFS_global_obj->am_master_compute_node()){
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+ app_info_buf_size, MPI_INT, tag);
+ } else {
+ send_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+ app_info_->get_master(), tag, MPI_COMM_WORLD);
+ receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+ MPI_ANY_SOURCE, tag, MPI_COMM_WORLD,
+ &app_status);
+ mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+ app_info_buf_size, MPI_INT, tag);
+ }
+ } else {
+ receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+ MPI_ANY_SOURCE, tag, MPI_COMM_WORLD,
+ &app_status);
+ mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+ app_info_buf_size, MPI_INT, tag);
+ }
+
+ io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+ &app_info_buf[2]);
+ } else if (node_type == COMPUTE_NODE) {
+ receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+ MPI_ANY_SOURCE, tag, MPI_COMM_WORLD,
+ &app_status);
+ mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+#ifdef DEBUG
+ printf("%d:app_info_buf_size =%d\n", world_rank_, app_info_buf_size);
+#endif
+ io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+ &app_info_buf[2]);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+ app_info_buf_size, MPI_INT, tag);
+
+ } else if (node_type == PART_TIME_COMPUTE) {
+ receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+ MPI_ANY_SOURCE, tag, MPI_COMM_WORLD,
+ &app_status);
+ mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+ io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+ &app_info_buf[2]);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+ app_info_buf_size, MPI_INT, tag);
+
+ } else {
+ printf("Error in CSDIO::receive_io_app_info - incorrect node type\n");
+ exit(1);
+ }
+ free(app_info_buf);
+ app_info_buf = NULL;
+}
+
+/* Store the schema only for the part-time i/o case. Don't send the any
+ * schema message.
+ */
+void CSDIO::send_schema_message(int array_id, int index)
+{
+ int *ptr = schema_bufs_[index];
+
+ if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+ *ptr++ = array_id;
+ *ptr++ = overlap_chunk_ids_[index];
+ *ptr++ = (int) nat_chunked_;
+ *ptr++ = (int) contiguous_;
+ *ptr++ = array_rank_;
+ *ptr++ = op_type_;
+
+ for(int i=0; i < array_rank_; i++) *ptr++ = overlap_base_[i];
+ for(i=0; i < array_rank_; i++) *ptr++ = overlap_size_[i];
+ for(i=0; i < array_rank_; i++) *ptr++ = overlap_stride_[i];
+ }
+ }
+
+
+void CSDIO::send_data_to_compute_nodes(Chunk *subchunk, int tag)
+{
+ for(int i=0; i < num_overlaps_; i++){
+ if (part_time_io_ && (dest_ids_[i] == world_rank_)){
+ copy_data(subchunk, i, YES, NULL, NULL);
+ requests_[i] = MPI_REQUEST_NULL;
+ } else {
+ nb_send_message((void *)data_ptrs_[i], 1, datatypes_[i],
+ dest_ids_[i], tag+CHUNK_DATA_FROM_IO,MPI_COMM_WORLD,
+ &requests_[i]);
+ }
+ }
+}
+
+void CSDIO::receive_data_from_compute_nodes(Chunk *subchunk, int tag)
+{
+ for(int i=0; i < num_overlaps_; i++){
+ if (part_time_io_ && (dest_ids_[i] == world_rank_)){
+ copy_data(subchunk, i, NO, NULL, NULL);
+ requests_[i] = MPI_REQUEST_NULL;
+ } else {
+ nb_receive_message((void *)data_ptrs_[i], 1, datatypes_[i],
+ dest_ids_[i], tag+CHUNK_DATA_TO_IO,MPI_COMM_WORLD,
+ &requests_[i]);
+ }
+ }
+}
+
+
+
+void CSDIO::realloc_compute_schema_bufs(int new_max)
+{
+ max_comp_rank_ = new_max;
+ comp_overlap_base_ = (int *) realloc(comp_overlap_base_, new_max*sizeof(int));
+ comp_overlap_stride_ = (int *) realloc(comp_overlap_stride_, new_max*sizeof(int));
+ comp_overlap_size_ = (int *) realloc(comp_overlap_size_, new_max*sizeof(int));
+}
+
+void CSDIO::realloc_pending_messages(int new_max)
+{
+ max_pending_ = new_max;
+ comp_datatypes_ =(MPI_Datatype *)realloc(comp_datatypes_,new_max*sizeof(MPI_Datatype));
+ comp_requests_ = (MPI_Request *)realloc(comp_requests_, new_max*sizeof(MPI_Request));
+ comp_statuses_ = (MPI_Status*)realloc(comp_statuses_, new_max*sizeof(MPI_Status));
+}
+
+
+void CSDIO::realloc_io_buffers(int new_max)
+{
+ io_max_overlaps_ = new_max;
+ io_overlap_chunk_ids_ =(int*)realloc(io_overlap_chunk_ids_, new_max*sizeof(int));
+ io_dest_ids_ = (int *) realloc(io_dest_ids_, new_max*sizeof(int));
+}
+
+void CSDIO::io_chunk_overlaps(Array *array, Chunk *subchunk)
+{
+ int num_compute_chunks;
+
+ if (nat_chunked_){
+ io_overlaps_ = 1;
+ io_overlap_chunk_ids_[0] = current_chunk_id_;
+ }
+ else{
+
+ num_compute_chunks = array->layout(IO_NODE)->total_elements();
+ if (num_compute_chunks > io_max_overlaps_) realloc_io_buffers(num_compute_chunks);
+ subchunk->chunk_overlaps(array, &io_overlaps_,
+ io_overlap_chunk_ids_, IO_NODE);
+ }
+
+ for(int i=0; i < io_overlaps_;i++)
+ io_dest_ids_[i] = io_app_info_->world_rank
+ (array->which_node(io_overlap_chunk_ids_[i],
+ IO_NODE, num_io_nodes_));
+}
+
+
+void CSDIO::wait_for_completion()
+{
+ MPI_Waitall(num_overlaps_, requests_, statuses_);
+ if (!contiguous_)
+ for(int i=0; i< num_overlaps_;i++)
+ MPI_Type_free(&datatypes_[i]);
+}
+
+char* CSDIO::name()
+{
+ return name_;
+}
+
diff --git a/src/Panda/CSDIO.h b/src/Panda/CSDIO.h
new file mode 100644
index 0000000..efd1841
--- /dev/null
+++ b/src/Panda/CSDIO.h
@@ -0,0 +1,60 @@
+#ifndef CSDIO_dot_h
+#define CSDIO_dot_h
+
+#include "Simple_IO.h"
+class ArrayGroup;
+class Array;
+class App_Info;
+class Chunk;
+
+class CSDIO : public Simple_IO
+{
+ protected:
+ int compute_pending_;
+ int max_pending_;
+ MPI_Datatype *comp_datatypes_;
+ MPI_Request *comp_requests_;
+ MPI_Status *comp_statuses_;
+ int comp_array_rank_;
+ int max_comp_rank_;
+ int *comp_overlap_base_;
+ int *comp_overlap_size_;
+ int *comp_overlap_stride_;
+ int io_max_overlaps_;
+ int io_overlaps_;
+ int *io_overlap_chunk_ids_;
+ int *io_dest_ids_;
+ Array *comp_current_array_;
+ int comp_current_array_id_;
+ int comp_current_chunk_id_;
+ int comp_current_subchunk_id_;
+ int comp_num_of_subchunks_;
+ App_Info *io_app_info_;
+
+ Boolean process_compute_side_array(ArrayGroup*,int,Boolean);
+ void clear();
+ void do_init();
+ void receive_io_app_info();
+ virtual void send_schema_message(int,int);
+ virtual void send_data_to_compute_nodes(Chunk*,int);
+ virtual void receive_data_from_compute_nodes(Chunk*,int);
+ void realloc_compute_schema_bufs(int);
+ void realloc_pending_messages(int);
+ void realloc_io_buffers(int);
+ void io_chunk_overlaps(Array*,Chunk*);
+ void wait_for_completion();
+
+
+ public:
+ CSDIO(int*,int,int,int,int,App_Info*);
+ CSDIO(int*,int,int,int,int,App_Info*,Boolean);
+ CSDIO();
+ virtual ~CSDIO();
+ virtual void start_to_finish(Boolean, ArrayGroup*);
+ virtual void compute_node_io_loop(ArrayGroup*);
+ virtual char* name();
+};
+
+
+
+#endif
diff --git a/src/Panda/CSDIO_Shared.C b/src/Panda/CSDIO_Shared.C
new file mode 100644
index 0000000..35e864d
--- /dev/null
+++ b/src/Panda/CSDIO_Shared.C
@@ -0,0 +1,241 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "CSDIO_Shared.h"
+
+/* we could have made this class multiply inherit from CSDIO and CSDIO_Shared, but
+ * we would have to use virtual inheritance and depending on the compiler used,
+ * there could be a performance penalty (though it would still be dwarfed by the
+ * cost of message-passing and disk i/o)
+ */
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+CSDIO_Shared::CSDIO_Shared(int *schema_string, int schema_size, int world_rank,
+ int comp_app_num,int comp_app_size , App_Info *app_info)
+: CSDIO(schema_string, schema_size, world_rank, comp_app_num,
+ comp_app_size, app_info)
+{
+
+ compute_chunk_ = new Chunk();
+ current_chunk_ = new Chunk();
+ subchunk_ = new Chunk();
+ current_array_id_ = -1;
+ if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+ (op_type_ == READ_TIMESTEP))
+ read_op_ = YES;
+ else
+ read_op_ = NO;
+
+ /* Send the IO app info to the compute nodes */
+ receive_io_app_info();
+
+ /* We need to set the following variables so that continue_io()*
+ * would start the I/O of the first subchunk automatically */
+ contiguous_ = NO;
+ current_array_id_ = -1;
+ current_chunk_id_ = 0;
+ num_of_chunks_ = -1; /* This will cause get_next_chunk() to fail */
+ current_subchunk_id_ = 0;
+ num_of_subchunks_ = -1; /* Causes get_next_subchunk() to fail */
+ status_flag_ = START;
+ continue_io();
+}
+
+CSDIO_Shared::~CSDIO_Shared()
+{
+ if (subchunk_) delete subchunk_;
+ if (compute_chunk_) delete compute_chunk_;
+ subchunk_ = compute_chunk_ = NULL;
+}
+
+Boolean CSDIO_Shared::get_next_array(){
+ current_array_id_++;
+ if (current_array_id_ < num_of_arrays_){
+ make_subchunks_ = -1;
+ current_array_ = find_array(current_array_id_);
+ nat_chunked_ = current_array_->nat_chunked();
+ sub_chunked_ = current_array_->sub_chunked();
+ array_rank_ = current_array_->rank();
+
+ if (array_rank_ > max_rank_){
+ realloc_schema_bufs(array_rank_);
+ }
+ num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+ current_chunk_id_ = -1;
+ if (nat_chunked_ && !sub_chunked_)
+ contiguous_ = YES; /* No need to use derived datatypes */
+ else
+ contiguous_ = NO; /* Have to use derived datatypes */
+
+ bytes_to_go_ = 0;
+ current_subchunk_id_ = -1;
+ return YES;
+ } else
+ return NO;
+}
+
+
+Boolean CSDIO_Shared::get_next_chunk()
+{
+ int *ptr;
+
+ if (!current_array_) return NO;
+ current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_);
+ if (current_chunk_id_ < num_of_chunks_){
+ current_chunk_->set_data_ptr(NULL);
+ current_chunk_->init(current_array_, current_chunk_id_,
+ IO_NODE, NO_ALLOC);
+ tag_ = current_chunk_id_*1000;
+ if (contiguous_){
+ bytes_to_go_ = current_chunk_->total_size_in_bytes();
+ current_chunk_->set_data_ptr(mem_buf_);
+ ptr = schema_bufs_[0];
+ *ptr++ = current_array_id_;
+ *ptr++ = current_chunk_id_;
+ *ptr++ = (int) nat_chunked_;
+ *ptr++ = (int) contiguous_;
+ *ptr++ = op_type_;
+ *ptr++ = 0;
+ *ptr++ = 0;
+ compute_chunk_overlaps(current_array_, current_chunk_);
+ }
+ else {
+ if (!sub_chunked_ && (make_subchunks_ == -1)){
+ current_array_->make_sub_chunks(current_chunk_);
+ make_subchunks_ = 1;
+ }
+ num_of_subchunks_ = current_array_->layout(SUB_CHUNK)->total_elements();
+ current_subchunk_id_ = -1;
+ }
+ return YES;
+ }
+ else
+ return NO;
+}
+
+
+/* This should not be called for the contiguous_ case */
+Boolean CSDIO_Shared::get_next_subchunk()
+{
+ current_subchunk_id_++;
+ if (current_subchunk_id_ < num_of_subchunks_){
+ subchunk_->set_data_ptr(NULL);
+ subchunk_->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+ bytes_to_go_ = subchunk_->total_size_in_bytes();
+
+ if (bytes_to_go_ < mem_buf_size_)
+ realloc_mem_bufs(bytes_to_go_);
+
+ subchunk_->set_data_ptr(mem_buf_);
+ return YES;
+ }
+ else
+ return NO;
+}
+
+
+void CSDIO_Shared::start_subchunk_io()
+{
+ int *ptr;
+
+ if (contiguous_){
+ ptr = schema_bufs_[0];
+ ptr[6] = min(SUBCHUNK_SIZE, bytes_to_go_);
+ if (read_op_) read_data(mem_buf_, ptr[6]);
+ if (read_op_)
+ nb_send_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+ tag_+CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+ else
+ nb_receive_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+ tag_+CHUNK_DATA_TO_IO, MPI_COMM_WORLD, &requests_[0]);
+ ptr[5] += ptr[6]; /* Offset of the next subchunk */
+ bytes_to_go_ -= ptr[6];
+ status_flag_ = WAITING;
+ tag_ += 10;
+
+ } else {
+ compute_chunk_overlaps(current_array_, subchunk_);
+
+ compute_schemas(current_array_, subchunk_, compute_chunk_, current_array_id_);
+ if (read_op_){
+ read_data(subchunk_);
+ send_data_to_compute_nodes(subchunk_, tag_);
+ }
+ else
+ receive_data_from_compute_nodes(subchunk_, tag_);
+ status_flag_ = WAITING;
+ tag_ += 10;
+ }
+}
+
+
+Boolean CSDIO_Shared::test_subchunk_io()
+{
+ int flag;
+ MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+ if (flag) {
+ status_flag_ = START;
+ if (!read_op_)
+ if (contiguous_)
+ write_data(mem_buf_, schema_bufs_[0][6], 1);
+ else
+ write_data(subchunk_);
+
+ if (!contiguous_) free_datatypes();
+ return YES;
+ }
+ return NO;
+}
+
+
+/* Return YES, if I/O is complete */
+Boolean CSDIO_Shared::continue_io()
+{
+ if (status_flag_ == START){
+ if (!start_next_subchunk_io()) return YES; /* IO completed */
+ } else if (status_flag_ == WAITING){
+ if (test_subchunk_io())
+ if (!start_next_subchunk_io()) return YES; /* IO done */
+ } else {
+ printf("Error - Invalid status_flag value \n");
+ exit(11);
+ }
+ return NO;
+}
+
+/* Return yes if you can start the io of another subchunk */
+Boolean CSDIO_Shared::start_next_subchunk_io()
+{
+ if (contiguous_){
+ if (bytes_to_go_ <= 0){
+ while(!get_next_chunk()){
+ if (!get_next_array()) return NO;
+ }
+ /* Since we might be looking at another array */
+ if (!contiguous_) get_next_subchunk();
+ }
+
+ start_subchunk_io();
+ } else {
+
+ if (!get_next_subchunk()){
+ /* We have finished this chunk */
+ while(!get_next_chunk()){
+ if (!get_next_array()) return NO;
+ }
+ if (!contiguous_) get_next_subchunk();
+ }
+
+ start_subchunk_io();
+ }
+ return YES;
+}
+
diff --git a/src/Panda/CSDIO_Shared.h b/src/Panda/CSDIO_Shared.h
new file mode 100644
index 0000000..08e9fd8
--- /dev/null
+++ b/src/Panda/CSDIO_Shared.h
@@ -0,0 +1,33 @@
+#ifndef CSDIO_Shared_dot_h
+#define CSDIO_Shared_dot_h
+
+#include "CSDIO.h"
+class Chunk;
+
+class CSDIO_Shared : public CSDIO
+{
+ protected:
+ int current_array_id_;
+ int status_flag_;
+ Chunk *subchunk_;
+ Chunk *compute_chunk_;
+ Boolean read_op_;
+ int bytes_to_go_;
+ int make_subchunks_;
+ int tag_;
+
+ Boolean get_next_chunk();
+ Boolean get_next_array();
+ Boolean get_next_subchunk();
+ Boolean start_next_subchunk_io();
+ void start_subchunk_io();
+ Boolean test_subchunk_io();
+
+ public:
+ CSDIO_Shared(int*,int,int,int,int, App_Info*);
+ virtual ~CSDIO_Shared();
+ virtual Boolean continue_io();
+};
+
+#endif
+
diff --git a/src/Panda/Chunk.C b/src/Panda/Chunk.C
new file mode 100644
index 0000000..d6fd028
--- /dev/null
+++ b/src/Panda/Chunk.C
@@ -0,0 +1,692 @@
+#include "definitions.h"
+#include "Chunk.h"
+#include "Array.h"
+#include <malloc.h>
+
+
+Chunk::Chunk()
+{
+ base_ = stride_ = size_ = NULL;
+ array_ = NULL;
+ chunk_ = NULL;
+ data_ptr_ = NULL;
+ stencil_width_ = 0;
+}
+
+
+/* This constructor is used to create a chunk given array information */
+Chunk::Chunk(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+ do_init(array, chunk_id, node_type, data_status);
+}
+
+/* Re-initialize an already created chunk object */
+void Chunk::init(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+ clear();
+ do_init(array, chunk_id, node_type, data_status);
+}
+
+void Chunk::do_init(Array *array, int chunk_id, int node_type,
+ DataStatus data_status)
+{
+ int *stride, *base;
+
+ /* Initialize the instance variables */
+ array_ = array;
+ chunk_ = NULL;
+ chunk_id_ = chunk_id;
+ am_subchunk_ = NO;
+ element_size_ = array->element_size();
+
+ stride = (int *) malloc(sizeof(int)*array->rank());
+ base = (int *) malloc(sizeof(int)*array->rank());
+ for(int i=0; i < array->rank(); i++){ stride[i] = 1; base[i] = 0; }
+
+ RegularDistribution *layout=(RegularDistribution *)(array->layout(node_type));
+ calculate_base_size_stride(array->rank(), base, array->size(), stride,
+ layout->layout(), layout->distribution(),
+ layout->block_dist(), chunk_id);
+
+ /* check if we have to allocate the data space */
+ switch(data_status) {
+ case ALLOC:
+ data_ptr_ = (char *)malloc(total_size_in_bytes());
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ case NO_ALLOC:
+ data_ptr_ = NULL;
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ default:
+ printf("Unsupported \n");
+ break;
+ }
+}
+
+/* This creates a subchunk , given the chunk and subchunk_id */
+Chunk::Chunk(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+/* Re-initialize an already created subchunk obj */
+void Chunk::init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ clear();
+ do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+
+void Chunk::do_init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ chunk_id_ = sub_chunkid;
+ element_size_ = mega_chunk->element_size();
+ array_ = mega_chunk->array();
+ chunk_ = mega_chunk;
+ am_subchunk_ = YES;
+
+ RegularDistribution *layout=(RegularDistribution *)(array_->layout(SUB_CHUNK));
+ calculate_base_size_stride(mega_chunk->rank(), mega_chunk->base(),
+ mega_chunk->size(), mega_chunk->stride(),
+ layout->layout(), layout->distribution(),
+ layout->block_dist(), sub_chunkid);
+ /* check if we have to allocate the data space */
+ switch(data_status) {
+ case ALLOC:
+ data_ptr_ = (char *)malloc(total_size_in_bytes());
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ case NO_ALLOC:
+ data_ptr_ = NULL;
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ default:
+ data_ptr_ = NULL;
+ printf("Unsupported \n");
+ break;
+ }
+}
+
+Chunk::~Chunk()
+{
+ if (base_) delete base_;
+ if (stride_) delete stride_;
+
+ /* Delete the data buffer only if we allocated it in the first place */
+ if ((data_status_ == ALLOC) && data_ptr_) delete data_ptr_;
+}
+
+
+void Chunk::clear()
+{
+ if (base_) free (base_);
+ if (stride_) free (stride_);
+ if (data_ptr_) free( data_ptr_);
+ if (size_) free(size_);
+ base_ = size_ = stride_ = NULL;
+ data_ptr_ = NULL;
+}
+
+/* This function takes as input the information about the global
+ * Array and returns the overlapping compute node chunk indices
+ * via a singly linked list.
+ *
+ * Currently this function can only handle BLOCK,* arrays (Needs
+ * to be extended for the CYCLIC case)
+ */
+void Chunk::chunk_overlaps(Array *global_array, int* num_overlaps,
+ int *ret_list, int node_type)
+{
+ RegularDistribution *layout1 =
+ (RegularDistribution *)global_array->layout(node_type);
+ ArrayLayout *layout= layout1->layout();
+ int layout_rank = layout->rank();
+ int *overlap_base = (int *)malloc(sizeof(int)*layout_rank);
+ int *overlap_size = (int *)malloc(sizeof(int)*layout_rank);
+
+ /* Find out the list of possible overlaps */
+ compute_first_last_chunk(global_array->rank(), global_array->size(),
+ layout, layout1->distribution(), layout1->block_dist(),
+ overlap_base, overlap_size);
+#ifdef DEBUG
+ printf("In chunk_overlaps\n");
+ for(int i=0;i<layout_rank;i++)
+ printf("base[%d] = %d size[%d] = %d\n", i, overlap_base[i], i, overlap_size[i]);
+#endif
+ layout->indices_list(overlap_base, overlap_size, num_overlaps, ret_list);
+ free(overlap_base);
+ free(overlap_size);
+}
+
+
+/* This function isn't general enough. It implicitly assumes that the I/O
+ * chunks are distributed using only BLOCK.* distributions. Also the
+ * compute node chunks are assumed to be distributed using only
+ * BLOCK,* (can be extended to support CYLCIC later)
+ *
+ * Function assumes that the memory for the return paramters
+ * overlap_base and overlap_size have been allocated
+ */
+void Chunk::compute_first_last_chunk(int array_rank, int *array_size,
+ ArrayLayout *layout, Distribution *dist,
+ Block_Distribution block_dist,
+ int *overlap_base, int *overlap_size)
+{
+ /* Validation of input data */
+ if (!(layout->valid_distribution(array_rank, dist)))
+ {
+ printf("Invalid distribution in compute_first_last_chunk\n");
+ exit(1);
+ }
+
+ /* Verify to see if we are dealing with BLOCK,* case only */
+ for(int i=0;i<layout->rank();i++)
+ {
+ if (dist[i] == CYCLIC)
+ {
+ printf("Cyclic schema not yet supported\n");
+ exit(2);
+ }
+ }
+
+ for(i=0; i<array_rank;i++)
+ {
+ if (stride_[i] != 1)
+ {
+ printf("Cyclic schema not yet supported\n");
+ exit(2);
+ }
+ }
+
+
+ /* Now we can get down to business */
+ int *overlap_last = (int*)malloc(sizeof(int)*layout->rank());
+ int layout_idx=0, array_idx;
+ int def_chunk_size,rem,tmp,last;
+
+ for(array_idx=0;array_idx < array_rank; array_idx++)
+ {
+ switch(dist[array_idx])
+ {
+ case NONE:
+ break;
+
+ case CYCLIC:
+ printf("Cyclic schema not yet supported\n");
+ exit(3);
+ break;
+
+ /* Need to verify this stuff - especially the NAS stuff */
+ case BLOCK:
+ switch(block_dist)
+ {
+ case HPF:
+ def_chunk_size = (array_size[array_idx]+layout->size(layout_idx)-1)
+ / (layout->size(layout_idx));
+ overlap_base[layout_idx] = base_[array_idx]
+ / def_chunk_size;
+ overlap_last[layout_idx] = (base_[array_idx]+size_[array_idx] -1)
+ / def_chunk_size;
+ break;
+
+ case NAS:
+ def_chunk_size = array_size[array_idx]
+ / layout->size(layout_idx);
+ rem = array_size[array_idx]
+ % layout->size(layout_idx);
+ if (rem == 0)
+ {
+ /* perfect distribution */
+ overlap_base[layout_idx] = base_[array_idx]
+ / def_chunk_size;
+ overlap_last[layout_idx] = (base_[array_idx]
+ + size_[array_idx] -1)
+ / def_chunk_size;
+ }
+ else
+ {
+ /* first "rem" blocks have "def_chunk+1" elements */
+ tmp = (def_chunk_size+1)*rem;
+ if (base_[array_idx] < tmp)
+ {
+ overlap_base[layout_idx] = base_[array_idx]
+ / (def_chunk_size + 1);
+ }
+ else
+ {
+ overlap_base[layout_idx] = ((base_[array_idx] - tmp)
+ / def_chunk_size) + rem;
+ }
+
+ last = base_[array_idx] + size_[array_idx] -1;
+ if (last < tmp)
+ {
+ overlap_last[layout_idx] = last / (def_chunk_size+1);
+
+ }
+ else
+ {
+ overlap_last[layout_idx] = ((last - tmp)
+ / def_chunk_size) + rem;
+ }
+ }
+ break;
+
+ default:
+ printf("Unsupported block distribution\n");
+ exit(2);
+ break;
+ }
+ overlap_size[layout_idx] = overlap_last[layout_idx]
+ - overlap_base[layout_idx] + 1;
+ layout_idx++;
+ break;
+
+ default:
+ printf("Unsupported distribution\n");
+ exit(3);
+ break;
+ }
+
+ }
+
+ free(overlap_last);
+ return;
+}
+
+
+
+
+int Chunk::total_size_in_bytes()
+{
+ return (total_size_in_elements()*element_size_);
+}
+
+
+
+int Chunk::total_size_in_elements()
+{
+ return total_elements();
+}
+
+
+int Chunk::chunk_id(){return chunk_id_;}
+
+
+void * Chunk::data_ptr(){return data_ptr_;}
+
+
+
+/* This is not a method. It is an generalized inline function to
+ * calculate the overlap between two chunks. The input parameters
+ * are rank,base,stride,size of the two arrays and the pointers to
+ * the base,strides and sizes of the resultant chunk. The functions
+ * assumes that the rank of the input arrays are equal
+ *
+ * This function also assumes that the memory for the return values
+ * r_base, r_stride, rsize have already been allocated.
+ */
+inline void determine_overlap(int rank, int *c1_base, int* c1_size,
+ int* c1_stride,
+ int* c2_base, int* c2_size, int* c2_stride,
+ int* r_base, int* r_size, int* r_stride)
+{
+
+ int tmp_base,tmp_size,n;
+
+ for(int i=0; i< rank;i++)
+ {
+ /* Compute overlap in each dimension */
+ if ((c1_stride[i] == 1) && (c2_stride[i] == 1))
+ {
+ /* Simplest case
+ * r_base = max(c1_base, c2_base)
+ * r_size = max( min(c1_base+c1_size, c2_base+c2_size)-r_base, 0);
+ */
+ r_base[i] = max(c1_base[i], c2_base[i]);
+ r_size[i] = max((min(c1_base[i]+c1_size[i], c2_base[i]+c2_size[i])
+ - r_base[i]), 0);
+ r_stride[i] = 1;
+ }
+ else if (c1_stride[i] == 1)
+ {
+ /* Not so simple - this needs to be verified
+ * tmp_B = max(c1_base,c2_base)
+ * B = tmp_B + (N - ((tmp_B - c2_base)%N))%N
+ * U = min(c1_base+(c1_size-1), c2_base+(c2_size-1)*N) - B
+ * if (U < 0) the no overlap else r_size = U/N + 1
+ */
+ n = c2_stride[i];
+ tmp_base = max(c1_base[i], c2_base[i]);
+ r_base[i] = tmp_base + (n -((tmp_base - c2_base[i])%n))%n;
+ tmp_size = min(c1_base[i]+(c1_size[i]-1), c2_base[i]+(c2_size[i]-1)*n);
+ if (tmp_size < 0)
+ {
+ /* no overlap */
+ r_size[i] = 0;
+ r_stride[i] = 1;
+ }
+ else
+ {
+ r_size[i] = tmp_size / n + 1;
+ r_stride[i] = n;
+ }
+ }
+ else if (c2_stride[i] == 1)
+ {
+ /* Similar to the previous case */
+ n = c1_stride[i];
+ tmp_base = max(c1_base[i], c2_base[i]);
+ r_base[i] = tmp_base + (n -((tmp_base - c1_base[i])%n))%n;
+ tmp_size = min(c1_base[i]+(c1_size[i]-1)*n, c2_base[i]+(c2_size[i]-1));
+ if (tmp_size < 0)
+ {
+ /* no overlap */
+ r_size[i] = 0;
+ r_stride[i] = 1;
+ }
+ else
+ {
+ r_size[i] = tmp_size / n + 1;
+ r_stride[i] = n;
+ }
+ }
+ else if (c1_stride[i] = c2_stride[i])
+ {
+ /* Can do this one later */
+ }
+ else
+ {
+ /* I give up */
+ }
+ }
+#ifdef DEBUG
+ /* Debugging output */
+ printf ("In determine overlap rank= %d\n", rank);
+ int k;
+ for(k=0;k<rank;k++)
+ printf("%d %d %d %d %d %d %d %d %d\n", c1_base[k], c1_size[k], c1_stride[k],
+ c2_base[k], c2_size[k], c2_stride[k],
+ r_base[k], r_size[k], r_stride[k]);
+#endif
+ return;
+}
+
+
+void Chunk::compute_overlap(Chunk *compute_chunk, int *overlap_base,
+ int *overlap_size, int *overlap_stride)
+{
+ determine_overlap(rank_, base_, size_, stride_,
+ compute_chunk->base(),
+ compute_chunk->size(),
+ compute_chunk->stride(),
+ overlap_base,
+ overlap_size,
+ overlap_stride);
+}
+
+
+int* Chunk::base(){return base_;}
+int* Chunk::size(){return size_;}
+int* Chunk::stride(){return stride_;}
+
+int Chunk::element_size() { return element_size_; }
+/* This function needs to be verified when the stride is not 1 */
+void Chunk::base_offset(int *base, void **ptr)
+{
+ int base_offset = 0;
+ int offset=1;
+
+ for(int i=rank_ - 1; i>= 0; i--)
+ {
+ base_offset += ((base[i]-base_[i]) / stride_[i])*offset;
+ offset *= size_[i];
+ }
+ base_offset *= element_size_;
+ *ptr = (char *)data_ptr_ + base_offset;
+}
+
+void Chunk::convert_from_number_to_index(int num, int *result)
+{
+ int i,j, product=1;
+
+ for(i=0;i<rank_;i++)
+ {
+ product=1;
+ for(j=i+1; j< rank_;j++) product *= size_[j];
+ result[i] = num / product;
+ num -= num/product * product;
+ }
+}
+
+
+/* This method calculates the rank, base, stride of the chunk *
+ * (subchunk), given the dimensions of the array (chunk) and its *
+ * layout, distribution and the chunk (subchunk index) */
+void Chunk::calculate_base_size_stride(int rank, int* old_base,
+ int* old_size, int* old_stride,
+ ArrayLayout *layout, Distribution *dist,
+ Block_Distribution block_dist, int id)
+{
+ int *chunk_index=NULL;
+ int idx=0, layout_idx=0;
+ int default_size, rem;
+
+ chunk_index = layout->convert_from_number_to_index(id);
+ rank_ = rank;
+ size_ = (int *) malloc(sizeof(int)*rank);
+ base_ = (int *) malloc(sizeof(int)*rank);
+ stride_ = (int *) malloc(sizeof(int)*rank);
+
+
+ /* Verify if it is possible to distribute the array (subchunk) */
+ if (!(layout->valid_index(chunk_index)))
+ {
+ printf("Invalid chunk index %d in compute_base_size_stride\n", id);
+ exit(1);
+ }
+ if (!(layout->valid_distribution(rank, dist)))
+ {
+ printf("Unable to distribute array in compute_base_size_stride\n");
+ exit(2);
+ }
+
+ for(idx=0; idx < rank; idx++)
+ {
+ switch(dist[idx])
+ {
+ case NONE:
+ base_[idx] = old_base[idx];
+ size_[idx] = old_size[idx];
+ stride_[idx] = old_stride[idx]*1;
+ break;
+
+ case CYCLIC:
+ base_[idx] = old_base[idx] + chunk_index[layout_idx]*old_stride[idx];
+ size_[idx] = (old_size[idx] - chunk_index[layout_idx]
+ + layout->size(layout_idx)-1)/ layout->size(layout_idx);
+ stride_[idx] = layout->size(layout_idx) * old_stride[idx];
+ layout_idx++;
+ break;
+
+ case BLOCK:
+ switch(block_dist)
+ {
+ case HPF:
+ default_size = (old_size[idx] + layout->size(layout_idx)-1)
+ /layout->size(layout_idx);
+ base_[idx] = old_base[idx] + default_size *
+ chunk_index[layout_idx] *old_stride[idx];
+ size_[idx] = default_size;
+ stride_[idx] = old_stride[idx]*1;
+ /* The last chunk may be smaller */
+ if (chunk_index[layout_idx] ==(layout->size(layout_idx)-1))
+ {
+ size_[idx] = old_size[idx] -
+ (default_size * chunk_index[layout_idx]);
+ }
+ break;
+
+ case NAS:
+ default_size = old_size[idx] / layout->size(layout_idx);
+ rem = old_size[idx] % layout->size(layout_idx);
+ if (chunk_index[layout_idx] < rem)
+ {
+ base_[idx] = old_base[idx] + (chunk_index[layout_idx] +
+ chunk_index[layout_idx]*default_size)
+ *old_stride[idx];
+ size_[idx] = default_size + 1;
+ }
+ else
+ {
+ base_[idx] = old_base[idx] + (rem +
+ chunk_index[layout_idx]*default_size)
+ *old_stride[idx];
+ size_[idx] = default_size;
+ }
+ stride_[idx] = old_stride[idx] * 1;
+ break;
+
+
+ default:
+ printf("Unsupported Block Distribution specified\n");
+ exit(3);
+ break;
+ }
+ layout_idx++;
+ break;
+
+ default:
+ printf("Unsupported Distribution specified\n");
+ exit(3);
+ break;
+ }
+ }
+
+ free(chunk_index);
+ return;
+}
+
+Array* Chunk::array(){return array_;}
+
+Boolean Chunk::am_subchunk(){return am_subchunk_;}
+
+void Chunk::copy_base_size_stride(int *base, int *size, int *stride)
+{
+ for(int i=0; i< rank_; i++){
+ base[i] = base_[i];
+ size[i] = size_[i];
+ stride[i] = stride_[i];
+ }
+ }
+
+
+
+/* This assumes that all the strides are 1 - i.e no cyclic */
+void Chunk::make_datatype(int *overlap_base, int *overlap_size,
+ int *overlap_stride, void **ptr,
+ MPI_Datatype *return_data_type)
+{
+
+ MPI_Datatype *tmp_types = (MPI_Datatype *) malloc(sizeof(MPI_Datatype) * rank_);
+ int i,j , offset = 1;
+ int base_offset = 0;
+ int *size, *base;
+ Boolean allocate;
+
+ // If there is a ghost region
+ int *array_size = array_->size();
+ int bound;
+ if (stencil_width_ > 0) {
+ size = (int *)malloc(sizeof(int) * rank_);
+ base = (int *)malloc(sizeof(int) * rank_);
+ for (i=0; i<rank_; i++) {
+ bound = base_[i] + size_[i];
+ base[i] = max(base_[i] - stencil_width_, 0);
+ bound = min(bound + stencil_width_, array_size[i]);
+ size[i] = bound - base[i];
+ }
+ allocate = YES;
+ //printf("##### stencil %d base %d %d %d size %d %d %d\n", stencil_width_, base[0], base[1], base[2], size[0], size[1], size[2]);
+ } else {
+ size = size_;
+ base = base_;
+ allocate = NO;
+ }
+
+ MPI_Type_contiguous(element_size_, MPI_CHAR, &tmp_types[rank_-1]);
+ if (overlap_stride[rank_ -1] != 1)
+ {
+ printf("error - stride is %d", overlap_stride[rank_ -1]);
+ exit(10);
+ }
+ MPI_Type_vector(overlap_size[rank_-1], 1, 1, tmp_types[rank_-1], &tmp_types[rank_-2]);
+ for(i=rank_-1; i > 0; i--)
+ {
+ offset=1;
+ for(j=i;j <rank_; j++) offset *= size[j];
+ if (overlap_stride[i-1] != 1)
+ {
+ printf("error - stride is %d\n", overlap_stride[i-1]);
+ exit(10);
+ }
+ if (i != 1){
+
+ MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_,
+ tmp_types[i-1],
+ &tmp_types[i-2]);
+ }
+ else
+ MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_,
+ tmp_types[i-1],
+ return_data_type);
+ }
+ MPI_Type_commit(return_data_type);
+ offset=1;
+ for(i=rank_-1;i >= 0; i--)
+ {
+ base_offset += (overlap_base[i] - base[i])*offset;
+ offset *= size[i];
+ }
+
+ *ptr = data_ptr_ + base_offset*element_size_;
+ free (tmp_types);
+ if (allocate) {
+ free(size);
+ free(base);
+ }
+}
+
+
+/* Old data buffer should be freed by someother function */
+void Chunk::set_data_ptr(char *data_ptr){
+ data_ptr_ = data_ptr;
+}
+
+void Chunk::set_stencil_width(int stencil_width){
+ stencil_width_ = stencil_width;
+}
+
+Chunk::Chunk(Array *array, int *base, int *size)
+{
+ array_ = array;
+ rank_ = array->rank();
+ element_size_ = array->element_size();
+ chunk_id_ = 0;
+ am_subchunk_ = NO;
+
+ base_ = copy_int_list(rank_, base);
+ size_ = copy_int_list(rank_, size);
+ stride_ = (int *)malloc(sizeof(int) * rank_);
+ for (int i=0; i<rank_; i++) stride_[i] = 1;
+ data_status_ = NO_ALLOC; data_ptr_ = NULL;
+}
diff --git a/src/Panda/Chunk.h b/src/Panda/Chunk.h
new file mode 100644
index 0000000..523a7d1
--- /dev/null
+++ b/src/Panda/Chunk.h
@@ -0,0 +1,68 @@
+#ifndef Chunk_dot_h
+#define Chunk_dot_h
+
+#include "mpi.h"
+#include "List.h"
+#include "ArrayLayout.h"
+
+class Array;
+
+
+class Chunk : public Template, public Linkable {
+ protected:
+ int *base_;
+ int *stride_;
+ int chunk_id_; /* This should be unique */
+ int element_size_;
+ Array* array_;
+ Chunk* chunk_;
+ char *data_ptr_;
+ int stencil_width_;
+ DataStatus data_status_;
+ Boolean am_subchunk_;
+
+
+ void compute_first_last_chunk(int, int*,
+ ArrayLayout*,Distribution*,
+ Block_Distribution, int*,
+ int*);
+ void do_init(Array*,int,int, DataStatus);
+ void do_init(Chunk*,int,DataStatus);
+ void clear();
+
+ public:
+ Chunk();
+ Chunk(Array*,int*,int*);
+ Chunk(Array*,int,int,DataStatus);
+ Chunk(Chunk*, int, DataStatus);
+ void init(Array*,int,int,DataStatus);
+ void init(Chunk*,int,DataStatus);
+ virtual ~Chunk();
+ void chunk_overlaps(Array *, int*, int*, int);
+ int total_size_in_bytes();
+ int total_size_in_elements();
+ int chunk_id();
+ void *data_ptr();
+ void set_data_ptr(char *);
+ void set_stencil_width(int);
+ int* base();
+ int* stride();
+ int* size();
+ int element_size();
+ void base_offset(int*, void**);
+ void compute_overlap(Chunk*,int*,int*,int*);
+ void convert_from_number_to_index(int,int*);
+ void calculate_base_size_stride(int, int*, int*, int*,
+ ArrayLayout*, Distribution*,
+ Block_Distribution, int);
+ Array* array();
+ Boolean am_subchunk();
+ void copy_base_size_stride(int*,int*, int*);
+ void make_datatype(int*,int*,int*,void**,MPI_Datatype*);
+
+};
+
+#endif
+
+
+
diff --git a/src/Panda/Collective_IO.C b/src/Panda/Collective_IO.C
new file mode 100644
index 0000000..118afe6
--- /dev/null
+++ b/src/Panda/Collective_IO.C
@@ -0,0 +1,25 @@
+#include "definitions.h"
+#include "Collective_IO.h"
+
+Collective_IO::Collective_IO(){}
+
+Collective_IO::~Collective_IO()
+{
+}
+
+Boolean Collective_IO::continue_io()
+{
+ printf("This function should not be executed\n");
+ return YES;
+}
+
+void Collective_IO::start_to_finish(Boolean part_time_io, Array *array)
+{
+ printf("This function should not be executed\n");
+}
+
+void Collective_IO::compute_node_io_loop(Array *array)
+{
+ printf("This function should not be executed\n");
+}
+
diff --git a/src/Panda/Collective_IO.h b/src/Panda/Collective_IO.h
new file mode 100644
index 0000000..aa351a7
--- /dev/null
+++ b/src/Panda/Collective_IO.h
@@ -0,0 +1,18 @@
+#ifndef Collective_IO_dot_h
+#define Collective_IO_dot_h
+
+#include "List.h"
+class Array;
+
+class Collective_IO : public Linkable{
+ public:
+ Collective_IO();
+ virtual ~Collective_IO();
+ virtual Boolean continue_io();
+ virtual void start_to_finish(Boolean, Array*);
+ virtual void compute_node_io_loop(Array*);
+};
+
+#endif
+
+
diff --git a/src/Panda/List.C b/src/Panda/List.C
new file mode 100644
index 0000000..8861a6f
--- /dev/null
+++ b/src/Panda/List.C
@@ -0,0 +1,175 @@
+#include "definitions.h"
+#include "List.h"
+
+Cell::Cell()
+{
+ item_ = NULL;
+ next_ = NULL;
+ prev_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item)
+{
+ item_ = new_item;
+ next_ = NULL;
+ prev_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item, Cell *prev)
+{
+ item_ = new_item;
+ prev_ = prev;
+ next_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item, Cell *next, Cell *prev)
+{
+ item_ = new_item;
+ next_ = next;
+ prev_ = prev;
+}
+
+
+Cell::~Cell()
+{
+ next_ = NULL;
+ prev_ = NULL;
+ item_ = NULL;
+}
+
+
+Linkable* Cell::item(){return item_;}
+
+Cell* Cell::next(){return next_;}
+
+Cell* Cell::prev(){return prev_;}
+
+void Cell::set_next(Cell *next) {next_ = next;}
+
+void Cell::set_prev(Cell *prev) {next_ = prev;}
+
+/*----------------------------------------------------*/
+
+List::List()
+{
+ head_ = NULL;
+ tail_ = NULL;
+ old_search_val_ = NULL;
+}
+
+List::~List()
+{
+ Cell* ptr = head_;
+ Cell* tmp;
+
+ while(ptr != NULL)
+ {
+ tmp = ptr->next();
+ delete ptr;
+ ptr = tmp;
+ }
+ head_ = NULL;
+ tail_ = NULL;
+ old_search_val_ = NULL;
+}
+
+
+/* Simply add to the beginning of the list */
+void List::insert(Linkable* new_item)
+{
+ add_first(new_item);
+}
+
+
+/* Add to the end of the list */
+void List::add_last(Linkable *new_item)
+{
+ Cell* tmp;
+
+ if ((tail_ == NULL) && (head_ == NULL))
+ {
+ /* The list is empty */
+ tmp = new Cell(new_item);
+ tail_ =tmp;
+ head_ = tmp;
+ return;
+ }
+ else if ((tail_ != NULL) && (head_ != NULL))
+ {
+ tmp = new Cell(new_item , tail_);
+ tail_->set_next(tmp);
+ tail_ = tmp;
+ return;
+ }
+ else
+ {
+ printf("Error in List obj\n");
+ }
+
+}
+
+/* Add to the beginning of the list */
+void List::add_first(Linkable *new_item)
+{
+ Cell *tmp;
+ if ((tail_ == NULL) && (head_ == NULL))
+ {
+ /* The list is empty */
+ tmp = new Cell(new_item);
+ head_ = tmp;
+ tail_ = tmp;
+ return;
+ }
+ else if ((tail_ != NULL) && (head_ != NULL))
+ {
+ tmp = new Cell(new_item, head_);
+ head_ = tmp;
+ return;
+ }
+ else
+ {
+ printf("Error in List obj\n");
+ }
+}
+
+
+/* Remove the item from the List */
+void List::remove(Linkable *new_item)
+{
+ Cell *ptr = head_, *prev, *next;
+ while (ptr)
+ {
+ if (ptr->item()==new_item)
+ {
+ prev = ptr->prev();
+ next = ptr->next();
+ if ((prev != NULL) && (next != NULL))
+ {
+ prev->set_next(next);
+ next->set_prev(prev);
+ delete ptr;
+ }
+ else if (prev != NULL)
+ {
+ /* ptr must be the last item */
+ prev->set_next(NULL);
+ tail_ = prev;
+ delete ptr;
+ }
+ else if (next != NULL)
+ {
+ /* ptr must be the first item */
+ next->set_prev(NULL);
+ head_ = next;
+ delete ptr;
+ }
+ else
+ {
+ head_ = tail_ = NULL;
+ delete ptr;
+ }
+ return;
+ }
+ else ptr = ptr->next();
+ }
+}
diff --git a/src/Panda/List.h b/src/Panda/List.h
new file mode 100644
index 0000000..1d162d4
--- /dev/null
+++ b/src/Panda/List.h
@@ -0,0 +1,61 @@
+#ifndef Link_dot_h
+#define Link_dot_h
+
+
+/* This is the dummy base class for all items *
+ * to be placed in a linked list. It would have *
+ * been cleaner to use Templates but support for *
+ * templates varies with different compilers and *
+ * the problem of code blow up etc exists. *
+ */
+class Linkable
+{
+ public:
+ Linkable(){};
+ virtual ~Linkable(){};
+
+};
+
+
+/* The Cells contains a Linkable element and ptrs *
+ * to the next and previos cells *
+ */
+class Cell {
+ Linkable *item_;
+ Cell *next_;
+ Cell *prev_;
+ public:
+ Cell();
+ Cell(Linkable*);
+ Cell(Linkable*,Cell*);
+ Cell(Linkable*,Cell*,Cell*);
+ ~Cell();
+ Linkable* item();
+ Cell* next();
+ Cell* prev();
+ void set_next(Cell*);
+ void set_prev(Cell*);
+};
+
+
+/* The List class provides support for creating a *
+ * list and provides operations like inserting, *
+ * deleting elements to the beginning and the end *
+ * of the list *
+ */
+class List {
+ public:
+ Cell *head_;
+ Cell *tail_;
+ Cell *old_search_val_; /* result of the previous search */
+ List();
+ ~List();
+ void insert(Linkable*);
+ void add_last(Linkable*);
+ void add_first(Linkable*);
+ void remove(Linkable*);
+};
+
+#endif
+
+
diff --git a/src/Panda/MPIFS.C b/src/Panda/MPIFS.C
new file mode 100644
index 0000000..e8b56ae
--- /dev/null
+++ b/src/Panda/MPIFS.C
@@ -0,0 +1,971 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Collective_IO.h"
+#include "Simple_IO.h"
+#include "Array.h"
+#include "Chunk.h"
+#include "message.h"
+#define Max_Open_Files 1000
+
+
+#include "external/IEEEIO/src/Arch.h"
+
+extern "C" {
+ IOFile IEEEopen(char *,char *);
+ int IOclose(IOFile);
+}
+
+int BRANCHING_FACTOR=8;
+int SUBCHUNK_SIZE = 1048576;
+MPIFS* MPIFS_global_obj;
+
+/* Notes,Hacks,Assumptions:
+ * - io_app_info_ and app_info_ point to the same object on the I/O
+ * for the regular. This hack is to allow for code re-use in the
+ * part-time I/O case.
+ */
+
+/* Constructor for the normal case - i.e no part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int app_num, int relative_rank, int app_size,
+ int *world_ranks)
+{
+#ifdef DEBUG
+ int abs_rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &abs_rank);
+ printf("%d:node_type=%d, app_num=%d , relative_rank=%d, app_size=%d\n",
+ abs_rank, node_type, app_num, relative_rank, app_size);
+#endif
+ do_init(node_type, app_num, relative_rank, app_size, world_ranks);
+}
+
+/* Constructor for the normal case - i.e no part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int app_num, int relative_rank, int app_size,
+ int *world_ranks, Boolean shared_flag)
+{
+ do_init(node_type, app_num, relative_rank, app_size, world_ranks);
+}
+
+
+/* Constructor for part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+ int io_rank, int io_size, int *io_world_ranks)
+{
+#ifdef DEBUG
+ int abs_rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &abs_rank);
+ printf("%d:node_type=%d, comp_rank=%d, comp_size=%d io_rank=%d io_size=%d\n",
+ abs_rank,node_type, comp_rank, comp_size, io_rank, io_size);
+#endif
+ do_init(node_type, comp_rank, comp_size, comp_world_ranks, io_rank, io_size,
+ io_world_ranks);
+}
+
+/* Initialize the file system object for the regular case (i.e no part-time I/O nodes) */
+void MPIFS::do_init(int node_type, int app_num, int relative_rank, int app_size,
+ int *world_ranks)
+{
+ MPI_Status status;
+ int tag, tmp;
+
+ MPIFS_global_obj = this;
+
+ if ((node_type != IO_NODE) && (node_type != COMPUTE_NODE))
+ {
+ printf("Invalid node type in MPIFS::do_init(int,int,int,int,int*)\n");
+ exit(10);
+ }
+
+ /* Initialize the state */
+ node_type_ = node_type;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+
+ app_num_ = app_num;
+ app_rank_ = relative_rank;
+ app_size_ = app_size;
+ app_info_ = new App_Info(app_num, app_size, world_ranks);
+ comm_ = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+ MPI_Comm_split(MPI_COMM_WORLD, app_num_, app_rank_, comm_);
+ num_apps_ = num_apps_alive_ = global_barrier_count_ = 0;
+ current_max_app_num_ = -1;
+ compute_apps_info_ = NULL;
+ mem_buf_ = NULL;
+ num_open_files_ = 0;
+ for (int i=0; i<Max_Open_Files; i++) open_file_names_[i] = NULL;
+
+ /* Part-time i/o stuff - unneeded in this case */
+ io_app_num_ = io_app_rank_ = io_app_size_ = -1;
+ io_comm_ = NULL;
+ io_app_info_ = NULL;
+
+ /* Broadcast the rank of the master I/O node. The strategy is *
+ * to send the info to node 0 and have it broadcast it */
+ master_io_node_ = -1;
+ if (node_type_ == IO_NODE)
+ master_io_node_ = app_info_->get_master();
+
+ if (world_rank_ == 0)
+ {
+ if (master_io_node_ != 0)
+ receive_message((void *)&master_io_node_, 1, MPI_INT, MPI_ANY_SOURCE,
+ 1000, MPI_COMM_WORLD, &status);
+ }
+ else if (world_rank_ == master_io_node_)
+ {
+ send_message((void *)&master_io_node_, 1 , MPI_INT, 0, 1000,
+ MPI_COMM_WORLD);
+ }
+ MPI_Bcast((void *)&master_io_node_, 1, MPI_INT, 0, MPI_COMM_WORLD);
+ /* Now all nodes know who the master I/O node is */
+
+ if (node_type_ == IO_NODE)
+ {
+ mem_buf_size_ = 2*SUBCHUNK_SIZE; /* Factor of 2 - just to be safe */
+ mem_buf_ = (char *) malloc(sizeof(char)*mem_buf_size_);
+
+ io_app_info_ = app_info_;
+ io_node_main_loop();
+ }
+ else if (node_type_ == COMPUTE_NODE)
+ {
+ /* Send the info about the compute application to the *
+ * master I/O node (only master I/O node has to do it) */
+ if (am_master_compute_node())
+ {
+ tag = app_num_*100+APP_INFO*10+SPECIAL;
+ send_message((void *)world_ranks, app_size, MPI_INT,
+ master_io_node_, tag, MPI_COMM_WORLD);
+ receive_message((void *)&tmp, 1, MPI_INT, master_io_node_,
+ tag, MPI_COMM_WORLD, &status);
+ }
+ MPI_Barrier(*comm_);
+
+ /* Create an intra-comm with the I/O nodes. This stuff is *
+ * used only for implemneting barriers etc */
+ MPI_Comm *inter_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+ MPI_Comm *intra_com = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+ MPI_Intercomm_create(*comm_, 0, MPI_COMM_WORLD,
+ master_io_node_, app_num, inter_comm);
+ MPI_Intercomm_merge(*inter_comm, 1, intra_com);
+ app_info_->set_intra_comm(intra_com);
+ }
+ else
+ {
+ printf("Unsupported node type\n");
+ exit(1);
+ }
+}
+
+void MPIFS::do_init(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+ int io_rank, int io_size, int *io_world_ranks)
+{
+ MPI_Group global_group, comp_group, io_group;
+
+
+ MPIFS_global_obj = this;
+ if ((node_type != PART_TIME_COMPUTE) && (node_type != PART_TIME_IO))
+ {
+ printf("Incorrect initialization for node_type %d\n", node_type);
+ exit(10);
+ }
+
+ /* Convention that logical I/O app gets app_num=0 and compute app get app_num=1 */
+ node_type_ = node_type;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+
+ app_num_ = 1;
+ app_rank_ = comp_rank;
+ app_size_ = comp_size;
+ master_io_node_ = io_world_ranks[0];
+ comm_ = (MPI_Comm *)malloc(sizeof(MPI_Comm));
+ MPI_Comm_group(MPI_COMM_WORLD, &global_group);
+ MPI_Group_incl(global_group, comp_size, comp_world_ranks, &comp_group);
+ MPI_Comm_create(MPI_COMM_WORLD, comp_group, comm_);
+ app_info_ = new App_Info(1, app_size_, comp_world_ranks);
+
+ num_apps_ = 1;
+ num_apps_alive_ =1;
+ current_max_app_num_=-1;
+ global_barrier_count_ =0;
+ compute_apps_info_ = NULL;
+ mem_buf_ = NULL;
+
+ io_app_num_ = 0; /* By convention */
+ io_app_rank_ = io_rank;
+ io_app_size_ = io_size;
+ io_app_info_ = NULL;
+
+ /* Everyone in MPI_COMM_WORLD must make this call */
+ io_comm_ = (MPI_Comm*) malloc(sizeof(MPI_Comm));
+ MPI_Comm_group(MPI_COMM_WORLD, &global_group);
+ MPI_Group_incl(global_group, io_size, io_world_ranks, &io_group);
+ MPI_Comm_create(MPI_COMM_WORLD, io_group, io_comm_);
+
+ if (node_type_ == PART_TIME_IO)
+ {
+ mem_buf_size_ = 2*SUBCHUNK_SIZE; /* Factor of 2 - just to be safe */
+ mem_buf_ = (char *) malloc(sizeof(char)*mem_buf_size_);
+ App_Info *app = new App_Info(1, comp_size, comp_world_ranks);
+ io_app_info_ = new App_Info(0, io_app_size_, io_world_ranks);
+ insert_compute_app(1, app);
+ }
+
+}
+
+MPIFS::~MPIFS()
+{
+ if (node_type_ == COMPUTE_NODE)
+ {
+ if (am_master_compute_node())
+ send_message((void *)&app_num_, 1, MPI_INT, master_io_node_,
+ QUIT, MPI_COMM_WORLD);
+ }
+
+
+ if (app_info_) delete app_info_;
+ if (mem_buf_) free(mem_buf_);
+
+ for (int i=0; i<num_open_files_; i++) {
+ free(open_file_names_[i]);
+ IOclose((IOFile)open_file_ptrs_[i]);
+ }
+
+ if (compute_apps_info_){
+ for(int i=0; i<=current_max_app_num_; i++)
+ if (compute_apps_info_[i]) delete compute_apps_info_[i];
+ delete compute_apps_info_;
+ }
+
+ app_info_ = io_app_info_ = NULL;
+ compute_apps_info_ =NULL;
+ if (comm_)
+ {
+ MPI_Comm_free(comm_);
+ free(comm_);
+ comm_= NULL;
+ }
+}
+
+
+Boolean MPIFS::am_compute_node()
+{
+ if (node_type_ == IO_NODE) return NO;
+ else return YES;
+}
+
+
+Boolean MPIFS::am_io_node()
+{
+ if ((node_type_ == IO_NODE) || (node_type_ == PART_TIME_IO))
+ return YES;
+ else return NO;
+}
+
+Boolean MPIFS::am_master_io_node()
+{
+ if (am_io_node() && (world_rank_ == io_app_info_->get_master()))
+ return YES;
+ else return NO;
+}
+
+Boolean MPIFS::am_master_compute_node()
+{
+ if (am_compute_node() && (world_rank_ == app_info_->get_master()))
+ return YES;
+ else return NO;
+}
+
+
+
+
+
+/* This is a highly restricted version of a broadcast function. The broadcast
+ * is performed using tree-structured communication, starting at relative
+ * node 0. The broadcast is implemented using tree-structured communication.
+ */
+void MPIFS::Broadcast(int node_type, void *buf, int count,
+ MPI_Datatype datatype, int tag)
+{
+ App_Info *app;
+ int my_rank = app_rank_, size;
+ int low, high, i, dest;
+
+ if (node_type == COMPUTE_NODE) app = app_info_;
+ else app = io_app_info_;
+ size = app->app_size();
+
+ low = my_rank*BRANCHING_FACTOR+1;
+ high = (my_rank+1)*BRANCHING_FACTOR+1;
+ i = low;
+
+#ifdef DEBUG
+ printf("%d: Bcast low %d high %d size=%d\n", world_rank_,low, high,size);
+#endif
+ /* Can use asynchronous sends */
+ while ((i<size) && (i<high)) {
+ dest = app->world_rank(i);
+ send_message(buf, count, datatype, dest, tag, MPI_COMM_WORLD);
+ i++;
+ }
+}
+
+
+void MPIFS::io_node_main_loop()
+{
+ MPI_Status msg_status;
+ int msg_tag, msg_code, msg_src;
+
+ while(1){
+ wait_for_next_message(&msg_code, &msg_src, &msg_tag, &msg_status);
+ switch(msg_code){
+
+ case SPECIAL:
+ process_io_special_message(msg_src, msg_tag, &msg_status);
+ break;
+
+ case ARRAYGROUP_SCHEMA:
+ start_collective_io(msg_src, msg_tag, &msg_status);
+ break;
+
+ case ATTRIBUTE_SCHEMA:
+ start_attribute_io(msg_src, msg_tag, &msg_status);
+ break;
+
+ case QUIT:
+ if (received_quit_message(msg_src, msg_tag, &msg_status))
+ return;
+ break;
+
+ default:
+ printf("Error - did not understand message code %d from %d with tag %d\n",
+ msg_code, msg_src, msg_tag);
+ break;
+ }
+ }
+}
+
+void MPIFS::wait_for_next_message(int *msg_code, int *msg_src, int *msg_tag,
+ MPI_Status *msg_status)
+{
+ MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, msg_status);
+ *msg_tag = msg_status->MPI_TAG;
+ *msg_src = msg_status->MPI_SOURCE;
+ *msg_code = *msg_tag % 10;
+ return;
+}
+
+void MPIFS::process_io_special_message(int msg_src, int msg_tag,
+ MPI_Status *status)
+{
+ int msg_code = (msg_tag / 10) % 10 ; /* Extract the second digit */
+ int msg_len, app_num, *world_ranks;
+ MPI_Comm *inter_comm, *intra_com;
+ App_Info *app;
+
+ switch (msg_code)
+ {
+ case APP_INFO:
+ MPI_Get_count(status, MPI_INT, &msg_len);
+ world_ranks = (int *) malloc(sizeof(int)*msg_len);
+ receive_message((void *)world_ranks, msg_len, MPI_INT, msg_src,
+ msg_tag, MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, world_ranks, msg_len, MPI_INT, msg_tag);
+ app_num = msg_tag / 100;
+ if (am_master_io_node())
+ send_message((void *) &app_num, 1, MPI_INT, msg_src,
+ msg_tag, MPI_COMM_WORLD);
+ app = new App_Info(app_num, msg_len, world_ranks);
+ insert_compute_app(app_num, app);
+ /* Create the Intra communicator */
+ inter_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+ intra_com = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+ MPI_Intercomm_create(*comm_, 0, MPI_COMM_WORLD,
+ world_ranks[0], (msg_tag/100), inter_comm);
+ MPI_Intercomm_merge(*inter_comm, 0, intra_com);
+ app->set_intra_comm(intra_com);
+ free(world_ranks);
+ break;
+
+ case APP_BARRIER:
+ /* This should be used very carefully when there are more than
+ * 1 compute application running
+ */
+ receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ MPI_Barrier(*(find_compute_app(app_num)->intra_comm()));
+ break;
+
+ case GLOBAL_BARRIER:
+ receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ global_barrier_count_++;
+ if (global_barrier_count_ == num_apps_alive_)
+ {
+ MPI_Barrier(MPI_COMM_WORLD);
+ global_barrier_count_ = 0;
+ }
+ break;
+
+ case CLEANFILES:
+ receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD,status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ cleanfiles(app_num);
+ break;
+
+ case FLUSHFILES:
+ receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ if (world_rank_ == 0) flushfiles(app_num);
+
+ break;
+
+
+ case CREATEFILES:
+ receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ createfiles(app_num);
+ break;
+
+ default:
+ printf("Unknown message code %d in proces_io_special\n", msg_code);
+ break;
+ }
+}
+
+
+void MPIFS::cleanfiles(int app_num)
+{
+ char buf[64];
+ sprintf(buf, "rm -rf %s%d\n", FILEPREFIX, app_num);
+ if (world_rank_ == 0) system(buf);
+}
+
+void MPIFS::createfiles(int app_num)
+{
+ char buf[64];
+ sprintf(buf, "mkdir %s%d\n", FILEPREFIX, app_num);
+ if (world_rank_ == 0) system(buf);
+}
+
+void MPIFS::flushfiles(int app_num)
+{
+ FILE *flushfp;
+ char filename[64];
+
+ sprintf(filename, "%s%d/%s.%d", FILEPREFIX, app_num,"flushfile",world_rank_);
+ if ((flushfp = fopen(filename,"wb+"))==NULL)
+ {
+ printf("Cannot open flush file on io node\n");
+ exit(1);
+ };
+
+#ifdef TARGETAIX
+ int size = 4*1024*1024; // on SP2
+#else
+ int size = 1*1024*1024; // on bunny
+#endif
+ char * buffer = (char*) malloc(sizeof(char) * size);
+#ifdef TARGETAIX
+ for(int i=0; i < 32; i++){
+ fwrite(buffer, sizeof(char), size, flushfp);
+ }
+#else
+ fwrite (buffer, sizeof(char), size, flushfp);
+#endif
+ int filedesc;
+ filedesc = fileno(flushfp);
+ fsync(filedesc);
+ fclose(flushfp);
+
+ if ((flushfp = fopen(filename,"r"))==NULL)
+ {
+ printf("Cannot open flush file on io node\n");
+ exit(1);
+ };
+
+#ifdef TARGETAIX
+ for(i=0; i < 32; i++){
+ fread(buffer, sizeof(char), size, flushfp);
+ }
+#else
+ fread(buffer, sizeof(char), size, flushfp);
+#endif
+
+ fclose(flushfp);
+ free(buffer);
+}
+
+
+void MPIFS::insert_compute_app(int app_num, App_Info *app)
+{
+
+ if (app_num > current_max_app_num_)
+ {
+ if (compute_apps_info_)
+ compute_apps_info_ = (App_Info **)realloc(compute_apps_info_,
+ sizeof(App_Info*)*(app_num+1));
+ else
+ compute_apps_info_ = (App_Info**)malloc(sizeof(App_Info*)*
+ (app_num + 1));
+
+ for(int i=current_max_app_num_+1; i <= app_num ;i++){
+ compute_apps_info_[i] = NULL;
+ }
+ current_max_app_num_ = app_num;
+ }
+ compute_apps_info_[app_num] = app;
+ num_apps_++;
+ num_apps_alive_++;
+}
+
+App_Info* MPIFS::find_compute_app(int app_num)
+{
+ return compute_apps_info_[app_num];
+}
+
+
+Boolean MPIFS::received_quit_message(int msg_src, int msg_tag,
+ MPI_Status *status)
+{
+ int app_num;
+ receive_message((void *) &app_num, 1, MPI_INT, msg_src, msg_tag,
+ MPI_COMM_WORLD, status);
+ Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+ num_apps_alive_--;
+ if (num_apps_alive_ == 0) return YES;
+ else return NO;
+}
+
+IOFile MPIFS::open_file(char *name, int op_type)
+{
+ IOFile fp;
+
+ for (int i=0; i<num_open_files_; i++)
+ if (!strcmp(name, open_file_names_[i])) {
+ fp = open_file_ptrs_[i];
+ free(name);
+ break;
+ }
+
+ if (i == num_open_files_) {
+ char name1[1000], fpfx[100];
+ FILE *fp1;
+
+ fp1 = fopen("FILEPREFIX", "r");
+ fscanf(fp1, "%s", fpfx);
+ fclose(fp1);
+ sprintf(name1, "%s/%s", fpfx, name);
+
+ open_file_names_[num_open_files_] = name;
+ if ((op_type == RESTART) || (op_type == READ_TIMESTEP))
+ fp = open_file_ptrs_[num_open_files_++] = IEEEopen(name1, "r");
+ else if ((op_type == TIMESTEP) || (op_type == CHECKPOINT))
+ fp = open_file_ptrs_[num_open_files_++] = IEEEopen(name1, "w");
+ is_new_file_[i] = YES;
+
+ }
+
+ return fp;
+}
+
+Boolean MPIFS::is_new_file(char *name) {
+ char name1[100];
+ if (node_type() == PART_TIME_COMPUTE || node_type() == COMPUTE_NODE) return;
+ sprintf(name1, "%s.%d", name, my_rank(IO_NODE));
+
+ for (int i=0; i<num_open_files_; i++)
+ if (!strcmp(name1, open_file_names_[i])) {
+ if (is_new_file_[i] == YES) {
+ is_new_file_[i] = NO;
+ return YES;
+ } else return NO;
+ }
+ printf("Can't find the file\n");
+ exit(0);
+ return NO;
+}
+
+
+void MPIFS::start_attribute_io(int msg_src, int msg_tag, MPI_Status *status)
+{
+ char *schema_buf;
+ int msg_len, i;
+ IOFile fp = 0;
+
+ mpi_get_count(status, MPI_CHAR, &msg_len);
+ schema_buf = (char *) malloc(sizeof(char)*msg_len);
+ receive_message((void *)schema_buf, msg_len, MPI_CHAR, msg_src,
+ msg_tag, MPI_COMM_WORLD,status);
+ Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_CHAR, msg_tag);
+
+ char *ptr = schema_buf;
+ union int_to_char tmp;
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ int op_type = tmp.i;
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ int len = tmp.i;
+ char *fname = (char *)malloc(sizeof(char) * (len + 1));
+ char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+ for (i=0; i<len; i++) fname[i] = *ptr++;
+ fname[i] = '\0';
+ sprintf(name1, "%s.%d", fname, world_rank_);
+ fp = open_file(name1, op_type);
+ Attribute *attr = new Attribute(ptr, op_type);
+ if (op_type == TIMESTEP) attr->write_data(fp);
+ else if (op_type == READ_TIMESTEP) {
+ attr->read_data(fp);
+ if (am_master_io_node()) send_attr_data(attr);
+ }
+ delete attr;
+ free(schema_buf);
+}
+
+void MPIFS::start_collective_io(int msg_src, int msg_tag, MPI_Status *status)
+{
+ int *schema_buf, msg_len, comp_app_num = (msg_tag / 10);
+ Collective_IO *new_io;
+ IOFile fp = 0;
+
+ mpi_get_count(status, MPI_INT, &msg_len);
+ schema_buf = (int *) malloc(sizeof(int)*msg_len);
+ receive_message((void *)schema_buf, msg_len, MPI_INT, msg_src,
+ msg_tag, MPI_COMM_WORLD,status);
+ Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_INT, msg_tag);
+
+ int len = schema_buf[2];
+ char *name = (char *)malloc(sizeof(char) * (len + 1));
+ char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+ for (int i=0; i<len; i++) name[i] = schema_buf[3+i];
+ name[i] = '\0';
+ sprintf(name1, "%s.%d", name, world_rank_);
+ fp = open_file(name1, schema_buf[1]);
+ free(name);
+
+ switch(schema_buf[0]){
+ case SIMPLE_IO:
+ new_io = new Simple_IO(schema_buf, msg_len,world_rank_, comp_app_num,
+ compute_apps_info_[comp_app_num]->app_size(),
+ compute_apps_info_[comp_app_num], fp);
+ break;
+
+ default:
+ printf("Error in start_collective_io - undefined strategy\n");
+ exit(1);
+ break;
+ }
+
+#ifdef DEBUG
+ printf("Starting the collective IO for compute app %d\n",comp_app_num);
+#endif
+ new_io->start_to_finish(NO, NULL);
+#ifdef DEBUG
+ printf("Finished the collective I/O for compute app %d\n", comp_app_num);
+#endif
+ release_compute_nodes(comp_app_num);
+}
+
+
+void MPIFS::part_time_io_node_loop(int *schema_buf, int msg_len,
+ Array *array)
+{
+ Collective_IO *new_io;
+ IOFile fp = 0;
+
+ int len = schema_buf[2];
+ char *name = (char *)malloc(sizeof(char) * (len + 1));
+ char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+ for (int i=0; i<len; i++) name[i] = schema_buf[3+i];
+ name[i] = '\0';
+ sprintf(name1, "%s.%d", name, world_rank_);
+ fp = open_file(name1, schema_buf[1]);
+ free(name);
+
+ switch(schema_buf[0]){
+ case SIMPLE_IO:
+ new_io = new Simple_IO(schema_buf, msg_len, world_rank_,
+ 1, app_size_, app_info_, fp);
+ break;
+
+ default:
+ printf("Error in part_time_io_node_loop - undefined strategy\n");
+ exit(1);
+ break;
+ }
+ new_io->start_to_finish(YES, array);
+ release_compute_nodes(1);
+}
+
+void MPIFS::compute_node_io_loop(Array *array)
+{
+ Simple_IO *simple;
+
+ switch(array->io_strategy()){
+ case SIMPLE_IO:
+ simple = new Simple_IO();
+ simple->compute_node_io_loop(array);
+ break;
+
+ default:
+ printf("Error in MPIFS::compute_node_io_loop - Undefined i/o strategy\n");
+ exit(1);
+ break;
+ }
+ compute_side_io_done();
+}
+
+
+int MPIFS::app_size(int node_type)
+{
+ if (node_type == COMPUTE_NODE)
+ {
+ if (node_type_ != IO_NODE)
+ return app_size_;
+ else {
+ printf("Error in MPIFS::app_size - wrong node_type\n");
+ exit(10);
+ }
+ }
+ else if (node_type == IO_NODE)
+ {
+ if (node_type_ == IO_NODE) return app_size_;
+ else if (node_type_ == PART_TIME_IO) return io_app_size_;
+ else {
+ printf("Error in MPIFS::app_size - wrong node_type\n");
+ exit(10);
+ }
+ }
+ else
+ {
+ printf("Error in MPIFS::app_size - wrong node_type\n");
+ exit(10);
+
+ }
+ return -1;
+}
+
+int MPIFS::my_rank(int node_type)
+{
+ if (node_type == COMPUTE_NODE)
+ {
+ if (node_type_ != IO_NODE)
+ return app_rank_;
+ else {
+ printf("Error in MPIFS::my_rank - wrong node_type\n");
+ exit(10);
+ }
+ }
+ else if (node_type == IO_NODE)
+ {
+ if (node_type_ == IO_NODE) return app_rank_;
+ else if (node_type_ == PART_TIME_IO) return io_app_rank_;
+ else {
+ printf("Error in MPIFS::my_rank - wrong node_type\n");
+ exit(10);
+ }
+ }
+ else
+ {
+ printf("Error in MPIFS::my_rank - wrong node_type\n");
+ exit(10);
+
+ }
+ return -1;
+}
+
+int MPIFS::node_type(){return node_type_;}
+
+void MPIFS::send_array_schema(Array *array)
+{
+ int *schema, schema_size;
+ if (am_master_compute_node()){
+ array->pack(&schema, &schema_size);
+ send_message((void *)schema, schema_size, MPI_INT, master_io_node_,
+ app_num_*10+ARRAYGROUP_SCHEMA, MPI_COMM_WORLD);
+ }
+}
+
+void MPIFS::receive_attr_data(Attribute *attr)
+{
+ int msg_len, i;
+ MPI_Status status;
+ char *ptr;
+
+ MPI_Probe(MPI_ANY_SOURCE, ATTRIBUTE_DATA, MPI_COMM_WORLD, &status);
+ mpi_get_count(&status, MPI_CHAR, &msg_len);
+ void *data_buf = (void *) malloc(msg_len);
+ receive_message((void *)data_buf, msg_len, MPI_CHAR, status.MPI_SOURCE,
+ ATTRIBUTE_DATA, MPI_COMM_WORLD, &status);
+ Broadcast(COMPUTE_NODE, data_buf, msg_len, MPI_CHAR, ATTRIBUTE_DATA);
+
+ ptr = (char *)data_buf;
+ union int_to_char tmp;
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ attr->set_esize(tmp.i);
+ for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+ attr->set_count(tmp.i);
+ attr->set_data_ptr(ptr);
+}
+
+void MPIFS::send_attr_data(Attribute *attr)
+{
+ int i;
+ void *data_buf = (void *)malloc((attr->data_size() + 8));
+ char *ptr = (char *)data_buf;
+ union int_to_char tmp;
+ tmp.i = attr->esize();
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ tmp.i = attr->count();
+ for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+ memcpy(ptr, attr->get_data_ptr(), attr->data_size());
+
+ int master_comp_node = compute_apps_info_[1]->get_master();
+ send_message(data_buf, attr->data_size()+8, MPI_CHAR,
+ master_comp_node, ATTRIBUTE_DATA, MPI_COMM_WORLD);
+ free(data_buf);
+}
+
+void MPIFS::receive_attr_schema()
+{
+ char *schema_buf;
+ int msg_len;
+ MPI_Status status;
+
+ MPI_Probe(MPI_ANY_SOURCE, ATTRIBUTE_SCHEMA, MPI_COMM_WORLD, &status);
+ mpi_get_count(&status, MPI_CHAR, &msg_len);
+ schema_buf = (char *) malloc(sizeof(char) * msg_len);
+ receive_message((void *)schema_buf, msg_len, MPI_CHAR, status.MPI_SOURCE,
+ ATTRIBUTE_SCHEMA, MPI_COMM_WORLD, &status);
+ Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_CHAR, ATTRIBUTE_SCHEMA);
+ free(schema_buf);
+}
+
+void MPIFS::send_attr_schema(Attribute *attr, char *fname, int op_type)
+{
+ char *schema;
+ int schema_size;
+
+ if (am_master_compute_node()){
+ attr->pack(schema_size, schema, fname, op_type);
+ send_message((void *)schema, schema_size, MPI_CHAR, master_io_node_,
+ ATTRIBUTE_SCHEMA, MPI_COMM_WORLD);
+ }
+ free(schema);
+}
+
+/* Called from the compute node side */
+void MPIFS::user_commands(int cmd)
+{
+ if (node_type_ == COMPUTE_NODE)
+ {
+ int tag = cmd*10+SPECIAL;
+ if (am_master_compute_node()){
+ send_message((void *)&app_num_, 1, MPI_INT, master_io_node_,
+ tag, MPI_COMM_WORLD);
+ }
+
+ if (cmd == APP_BARRIER){
+ MPI_Barrier(*(app_info_->intra_comm()));
+ } else if (cmd == GLOBAL_BARRIER){
+ MPI_Barrier(MPI_COMM_WORLD);
+ }
+ }
+ else {
+ /* Must be the part_time I/O case */
+ switch(cmd){
+ case APP_BARRIER:
+ MPI_Barrier(*comm_);
+ break;
+ case GLOBAL_BARRIER:
+ MPI_Barrier(MPI_COMM_WORLD);
+ break;
+ case CLEANFILES:
+ if (node_type_ == PART_TIME_IO)
+ cleanfiles(app_num_);
+ break;
+ case FLUSHFILES:
+ if (node_type_ == PART_TIME_IO)
+ flushfiles(app_num_);
+ break;
+ case CREATEFILES:
+ if (node_type_ == PART_TIME_IO)
+ createfiles(app_num_);
+ break;
+ default:
+ printf("Unknown message code %d\n", cmd);
+ break;
+ }
+ }
+}
+
+/* This function is called by io nodes or part-time io nodes */
+void MPIFS::release_compute_nodes(int app_num)
+{
+ App_Info *app;
+
+ if (node_type_ == PART_TIME_IO){
+ MPI_Barrier(MPI_COMM_WORLD);
+ } else if ((node_type_ == IO_NODE) && (num_apps_ == 1)){
+ MPI_Barrier(*comm_);
+ if (am_master_io_node()){
+ app = find_compute_app(app_num);
+ send_message(&app_num, 1, MPI_INT, app->get_master(), COMP_QUIT,
+ MPI_COMM_WORLD);
+ }
+ } else {
+ printf("Error in release_compute_nodes - wrong node type \n");
+ exit(11);
+ }
+}
+
+/* This function is called by the compute nodes after they have done their part */
+void MPIFS::compute_side_io_done()
+{
+ int app_num;
+ MPI_Status status;
+
+ if (node_type_ == PART_TIME_COMPUTE){
+ MPI_Barrier(MPI_COMM_WORLD);
+ } else if (node_type_ == COMPUTE_NODE){
+ if (am_master_compute_node())
+ receive_message((void *)&app_num, 1, MPI_INT, master_io_node_, COMP_QUIT,
+ MPI_COMM_WORLD, &status);
+ MPI_Barrier(*comm_);
+ } else {
+ printf("Error in compute_side_io_doen - wrong node type\n");
+ exit(11);
+ }
+}
+
+App_Info* MPIFS::io_app_info(){
+ return io_app_info_;
+}
+
+int MPIFS::master_io_node(){
+ return master_io_node_;
+}
+
+int MPIFS::mem_buf_size()
+{
+ return mem_buf_size_;
+}
+
+char *MPIFS::mem_buf()
+{
+ return mem_buf_;
+}
+
+void MPIFS::set_mem_buf_size(int size)
+{
+ mem_buf_size_ = size;
+}
+
+void MPIFS::set_mem_buf(char *buf)
+{
+ mem_buf_ = buf;
+}
diff --git a/src/Panda/MPIFS.h b/src/Panda/MPIFS.h
new file mode 100644
index 0000000..4fbedfe
--- /dev/null
+++ b/src/Panda/MPIFS.h
@@ -0,0 +1,95 @@
+#ifndef MPIFS_dot_h
+#define MPIFS_dot_h
+
+#include "definitions.h"
+#include "VirtFS.h"
+#include "mpi.h"
+#include "App_Info.h"
+#include "List.h"
+#include "Attribute.h"
+
+
+class Collective_IO;
+class Array;
+
+
+class MPIFS : public VirtFS {
+ int node_type_; /* compute,io,part_time .. */
+ int world_rank_; /* rank in MPI_COMM_WORLD */
+ int app_num_; /* for io-nodes this should be 0 */
+ int app_rank_; /* rank within the applications */
+ int app_size_; /* size of the application */
+ int master_io_node_;
+ MPI_Comm *comm_;
+ App_Info *app_info_; /* rank --> world mapping */
+
+ /* Information used by the IO nodes */
+ int num_apps_; /* # of compute apps */
+ int num_apps_alive_;
+ int current_max_app_num_;
+ int global_barrier_count_;
+ App_Info **compute_apps_info_;
+ char *mem_buf_;
+ int mem_buf_size_;
+ int num_open_files_;
+ IOFile open_file_ptrs_[1000];
+ char *open_file_names_[1000];
+ Boolean is_new_file_[1000];
+
+ /* Information required for part-time nodes */
+ int io_app_num_;
+ int io_app_rank_;
+ int io_app_size_;
+ MPI_Comm *io_comm_;
+ App_Info *io_app_info_;
+
+ void do_init(int,int,int,int,int*);
+ void do_init(int,int,int,int*, int,int,int*);
+ void wait_for_next_message(int*,int*,int*, MPI_Status*);
+ void process_io_special_message(int,int, MPI_Status*);
+ void cleanfiles(int);
+ void createfiles(int);
+ void flushfiles(int);
+ void insert_compute_app(int , App_Info*);
+ App_Info* find_compute_app(int);
+ Boolean received_quit_message(int,int,MPI_Status*);
+
+
+ public:
+ MPIFS(int,int,int, int,int*);
+ MPIFS(int,int,int,int,int*, Boolean);
+ MPIFS(int,int,int,int*,int,int,int*);
+ virtual ~MPIFS();
+ Boolean am_master_compute_node();
+ Boolean am_compute_node();
+ Boolean am_master_io_node();
+ Boolean am_io_node();
+ void Broadcast(int,void*, int,MPI_Datatype,int);
+ void io_node_main_loop();
+ void start_collective_io(int,int, MPI_Status*);
+ void start_attribute_io(int,int,MPI_Status*);
+ void part_time_io_node_loop(int*,int, Array*);
+ void compute_node_io_loop(Array*);
+
+ int app_size(int);
+ int my_rank(int);
+ void send_array_schema(Array*);
+ void send_attr_schema(Attribute*, char*, int);
+ void receive_attr_schema();
+ void send_attr_data(Attribute *);
+ void receive_attr_data(Attribute *);
+ int node_type();
+ void user_commands(int);
+ void release_compute_nodes(int);
+ void compute_side_io_done();
+ App_Info* io_app_info();
+ int master_io_node();
+ int mem_buf_size();
+ void set_mem_buf_size(int);
+ char* mem_buf();
+ void set_mem_buf(char *);
+ IOFile open_file(char *, int);
+ Boolean is_new_file(char *);
+};
+
+#endif
diff --git a/src/Panda/Panda.C b/src/Panda/Panda.C
new file mode 100644
index 0000000..6b933e6
--- /dev/null
+++ b/src/Panda/Panda.C
@@ -0,0 +1,153 @@
+#include "definitions.h"
+#include "Panda.h"
+
+
+/* Currently the sequential case is unsupported */
+
+extern MPIFS *MPIFS_global_obj;
+int global_system_type_;
+
+Panda::Panda()
+{
+ file_system_type_ = UNIX_SYSTEM;
+ file_system_ = NULL;
+}
+
+/* This is the interface for regular Panda (i.e no part-time io nodes) */
+Panda::Panda(int node_type, int app_num , int relative_rank,int app_size,
+ int* world_ranks)
+{
+ global_system_type_ = file_system_type_ = MPI_SYSTEM;
+ file_system_ = new MPIFS(node_type, app_num, relative_rank, app_size,
+ world_ranks);
+}
+
+/* This is the interface for regular Panda (i.e no part-time io nodes) */
+Panda::Panda(int node_type, int app_num , int relative_rank,int app_size,
+ int* world_ranks, Boolean shared_flag)
+{
+ global_system_type_ = file_system_type_ = MPI_SYSTEM;
+ file_system_ = new MPIFS(node_type, app_num, relative_rank, app_size,
+ world_ranks, shared_flag);
+}
+
+/* This is the interface for part-time io nodes */
+Panda::Panda(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+ int io_rank, int io_size, int *io_world_ranks)
+{
+ global_system_type_ = file_system_type_ = MPI_SYSTEM;
+ file_system_ = new MPIFS(node_type, comp_rank, comp_size, comp_world_ranks,
+ io_rank, io_size, io_world_ranks);
+}
+
+/* This is the simplest interface for initializing the I/O and compute nodes.
+ * It can be used only for regular and shread i/o (i.e it cannot be used for
+ * part-time i/o nodes. The constructor assumes that MPIRUN library has been
+ * installed and you have distinct applications at the mpirun level
+ */
+
+/*
+Panda::Panda(int node_type)
+{
+ int app_size, app_rank, *world_ranks, leader;
+
+ file_system_type_ = MPI_SYSTEM;
+ if ((node_type == COMPUTE_NODE) || (node_type == IO_NODE)){
+ MPI_Comm_size(MPIRUN_APP_COMM, &app_size);
+ MPI_Comm_rank(MPIRUN_APP_COMM, &app_rank);
+ leader = MPIRUN_APP_LEADERS[MPIRUN_APP_ID];
+ world_ranks = (int *) malloc(sizeof(int)*app_size);
+ for(int i=0; i<app_size;i++)
+ world_ranks[i] = leader + i;
+ file_system_ = new MPIFS(node_type, MPIRUN_APP_ID, app_rank, app_size,
+ world_ranks);
+ free(world_ranks);
+ world_ranks = NULL;
+ } else {
+ printf("Error: Invalid constructor for this node_type %d\n", node_type);
+ exit(1);
+ }
+}
+*/
+
+/* This is the simplest interface for the part-time i/o nodes. Here the number
+ * of i/o nodes is specified. The first <num_io_nodes> are designated as part
+ * time i/o nodes and the remaining as part-time compute. This requires mpirun
+ * library to be initialized and there should be only one mpirun application.
+ */
+/*
+Panda::Panda(int node_type, int num_io_nodes)
+{
+ int app_rank, app_size, *io_ranks, *world_ranks;
+
+ file_system_type_ = MPI_SYSTEM;
+ if ((node_type == COMPUTE_NODE) || (node_type == IO_NODE)){
+ printf("Error: Invalid constructor\n");
+ exit(1);
+ } else if (MPIRUN_NUM_APPS == 1){
+ MPI_Comm_size(MPIRUN_APP_COMM, &app_size);
+ MPI_Comm_rank(MPIRUN_APP_COMM, &app_rank);
+ world_ranks = (int *)malloc(sizeof(int)*app_size);
+ io_ranks = (int *) malloc(sizeof(int)*num_io_nodes);
+ for(int i=0;i<app_size; i++) world_ranks[i] = i;
+ for(i=0;i<num_io_nodes;i++) io_ranks[i] =i;
+ if (app_rank < num_io_nodes){
+ file_system_ = new MPIFS(PART_TIME_IO, app_rank, app_size, world_ranks,
+ app_rank, num_io_nodes, io_ranks);
+ } else {
+ file_system_ = new MPIFS(PART_TIME_COMPUTE, app_rank, app_size, world_ranks,
+ -1, num_io_nodes, io_ranks);
+ }
+ } else {
+ printf("Error: Part-time I/O nodes - More than one mpirun app running\n");
+ exit(1);
+ }
+}
+*/
+
+Panda::~Panda()
+{
+ if (file_system_) delete file_system_;
+ file_system_ = NULL;
+
+}
+
+void Panda::global_barrier()
+{
+ if (file_system_type_ == MPI_SYSTEM)
+ {
+ ((MPIFS *) file_system_)->user_commands(GLOBAL_BARRIER);
+ }
+}
+
+void Panda::app_barrier()
+{
+ if (file_system_type_ == MPI_SYSTEM)
+ {
+ ((MPIFS *) file_system_)->user_commands(APP_BARRIER);
+ }
+}
+
+void Panda::flushfiles()
+{
+ if (file_system_type_ == MPI_SYSTEM)
+ {
+ ((MPIFS *) file_system_)->user_commands(FLUSHFILES);
+ }
+}
+
+void Panda::cleanfiles()
+{
+ if (file_system_type_ == MPI_SYSTEM)
+ {
+ ((MPIFS *) file_system_)->user_commands(CLEANFILES);
+ }
+}
+
+void Panda::createfiles()
+{
+ if (file_system_type_ == MPI_SYSTEM)
+ {
+ ((MPIFS *) file_system_)->user_commands(CREATEFILES);
+ }
+}
diff --git a/src/Panda/Panda.h b/src/Panda/Panda.h
new file mode 100644
index 0000000..c2515b3
--- /dev/null
+++ b/src/Panda/Panda.h
@@ -0,0 +1,31 @@
+#ifndef Panda_dot_h
+#define Panda_dot_h
+
+#include "VirtFS.h"
+#include "MPIFS.h"
+
+
+class Panda {
+ int file_system_type_;
+ VirtFS *file_system_;
+
+public:
+ Panda();
+ Panda(int, int, int,int, int*);
+ Panda(int, int, int,int, int*, Boolean);
+ Panda(int,int,int,int*,int,int,int*);
+ Panda(int);
+ Panda(int, int);
+ ~Panda();
+
+ /* stuff required only for testing purposes */
+ void global_barrier();
+ void app_barrier();
+ void cleanfiles();
+ void flushfiles();
+ void createfiles();
+};
+
+#endif
+
+
diff --git a/src/Panda/Shared_IO.C b/src/Panda/Shared_IO.C
new file mode 100644
index 0000000..8b5a5cd
--- /dev/null
+++ b/src/Panda/Shared_IO.C
@@ -0,0 +1,237 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "Shared_IO.h"
+
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+Shared_IO::Shared_IO(int *schema_string, int schema_size, int world_rank,
+ int comp_app_num,int comp_app_size , App_Info *app_info)
+: Simple_IO(schema_string, schema_size, world_rank, comp_app_num,
+ comp_app_size, app_info)
+{
+
+ compute_chunk_ = new Chunk();
+ current_chunk_ = new Chunk();
+ subchunk_ = new Chunk();
+ current_array_id_ = -1;
+ if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+ (op_type_ == READ_TIMESTEP))
+ read_op_ = YES;
+ else
+ read_op_ = NO;
+
+ /* We need to set the following variables so that continue_io()*
+ * would start the I/O of the first subchunk automatically */
+ contiguous_ = NO;
+ current_array_id_ = -1;
+ current_chunk_id_ = 0;
+ num_of_chunks_ = -1; /* This will cause get_next_chunk() to fail */
+ current_subchunk_id_ = 0;
+ num_of_subchunks_ = -1; /* Causes get_next_subchunk() to fail */
+ status_flag_ = START;
+ continue_io();
+}
+
+Shared_IO::~Shared_IO()
+{
+ if (subchunk_) delete subchunk_;
+ if (compute_chunk_) delete compute_chunk_;
+ subchunk_ = compute_chunk_ = NULL;
+}
+
+Boolean Shared_IO::get_next_array(){
+ current_array_id_++;
+ if (current_array_id_ < num_of_arrays_){
+ make_subchunks_ = -1;
+ current_array_ = find_array(current_array_id_);
+ nat_chunked_ = current_array_->nat_chunked();
+ sub_chunked_ = current_array_->sub_chunked();
+ array_rank_ = current_array_->rank();
+
+ if (array_rank_ > max_rank_){
+ realloc_schema_bufs(array_rank_);
+ }
+ num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+ current_chunk_id_ = -1;
+ if (nat_chunked_ && !sub_chunked_)
+ contiguous_ = YES; /* No need to use derived datatypes */
+ else
+ contiguous_ = NO; /* Have to use derived datatypes */
+
+ bytes_to_go_ = 0;
+ current_subchunk_id_ = -1;
+ return YES;
+ } else
+ return NO;
+}
+
+
+Boolean Shared_IO::get_next_chunk()
+{
+ int *ptr;
+
+ if (!current_array_) return NO;
+ current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_);
+ if (current_chunk_id_ < num_of_chunks_){
+ current_chunk_->set_data_ptr(NULL);
+ current_chunk_->init(current_array_, current_chunk_id_,
+ IO_NODE, NO_ALLOC);
+ if (contiguous_){
+ bytes_to_go_ = current_chunk_->total_size_in_bytes();
+ current_chunk_->set_data_ptr(mem_buf_);
+ ptr = schema_bufs_[0];
+ *ptr++ = current_array_id_;
+ *ptr++ = current_chunk_id_;
+ *ptr++ = (int) nat_chunked_;
+ *ptr++ = (int) contiguous_;
+ *ptr++ = op_type_;
+ *ptr++ = 0;
+ *ptr++ = 0;
+ compute_chunk_overlaps(current_array_, current_chunk_);
+ }
+ else {
+ if (!sub_chunked_ && (make_subchunks_ == -1)){
+ current_array_->make_sub_chunks(current_chunk_);
+ make_subchunks_ = 1;
+ }
+ num_of_subchunks_ = current_array_->layout(SUB_CHUNK)->total_elements();
+ current_subchunk_id_ = -1;
+ }
+ return YES;
+ }
+ else
+ return NO;
+}
+
+
+/* This should not be called for the contiguous_ case */
+Boolean Shared_IO::get_next_subchunk()
+{
+ current_subchunk_id_++;
+ if (current_subchunk_id_ < num_of_subchunks_){
+ subchunk_->set_data_ptr(NULL);
+ subchunk_->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+ bytes_to_go_ = subchunk_->total_size_in_bytes();
+
+ if (bytes_to_go_ < mem_buf_size_)
+ realloc_mem_bufs(bytes_to_go_);
+
+ subchunk_->set_data_ptr(mem_buf_);
+ return YES;
+ }
+ else
+ return NO;
+}
+
+
+void Shared_IO::start_subchunk_io()
+{
+ int *ptr;
+
+ if (contiguous_){
+ ptr = schema_bufs_[0];
+ ptr[6] = min(SUBCHUNK_SIZE, bytes_to_go_);
+
+ nb_send_message((void *)ptr, 7, MPI_INT, dest_ids_[0],
+ CHUNK_SCHEMA, MPI_COMM_WORLD, &schema_requests_[0]);
+ if (read_op_){
+ read_data(mem_buf_, ptr[6]);
+ nb_send_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+ CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+ }
+ else
+ nb_receive_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+ CHUNK_DATA_TO_IO, MPI_COMM_WORLD, &requests_[0]);
+ ptr[5] += ptr[6]; /* Offset of the next subchunk */
+ bytes_to_go_ -= ptr[6];
+ status_flag_ = WAITING;
+
+ } else {
+ compute_chunk_overlaps(current_array_, subchunk_);
+
+ compute_schemas(current_array_, subchunk_, compute_chunk_,
+ current_array_id_);
+ if (read_op_){
+ read_data(subchunk_);
+ send_data_to_compute_nodes(subchunk_, NULL, NULL);
+ }
+ else
+ receive_data_from_compute_nodes(subchunk_, NULL, NULL);
+ status_flag_ = WAITING;
+ }
+}
+
+
+Boolean Shared_IO::test_subchunk_io()
+{
+ int flag;
+ MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+ if (flag) {
+ /* Free schema request objects - Do we need to do this */
+ MPI_Waitall(num_overlaps_, schema_requests_,statuses_);
+ status_flag_ = START;
+ if (!read_op_)
+ if (contiguous_)
+ write_data(mem_buf_, schema_bufs_[0][6], 1);
+ else
+ write_data(subchunk_);
+
+ if (!contiguous_) free_datatypes();
+ return YES;
+ }
+ return NO;
+}
+
+
+/* Return YES, if I/O is complete */
+Boolean Shared_IO::continue_io()
+{
+ if (status_flag_ == START){
+ if (!start_next_subchunk_io()) return YES; /* IO completed */
+ } else if (status_flag_ == WAITING){
+ if (test_subchunk_io())
+ if (!start_next_subchunk_io()) return YES; /* IO done */
+ } else {
+ printf("Error - Invalid status_flag value \n");
+ exit(11);
+ }
+ return NO;
+}
+
+/* Return yes if you can start the io of another subchunk */
+Boolean Shared_IO::start_next_subchunk_io()
+{
+ if (contiguous_){
+ if (bytes_to_go_ <= 0){
+ while(!get_next_chunk()){
+ if (!get_next_array()) return NO;
+ }
+ /* Since we might be looking at another array */
+ if (!contiguous_) get_next_subchunk();
+ }
+
+ start_subchunk_io();
+ } else {
+
+ if (!get_next_subchunk()){
+ /* We have finished this chunk */
+ while(!get_next_chunk()){
+ if (!get_next_array()) return NO;
+ }
+ if (!contiguous_) get_next_subchunk();
+ }
+
+ start_subchunk_io();
+ }
+ return YES;
+}
+
diff --git a/src/Panda/Shared_IO.h b/src/Panda/Shared_IO.h
new file mode 100644
index 0000000..8f29eca
--- /dev/null
+++ b/src/Panda/Shared_IO.h
@@ -0,0 +1,32 @@
+#ifndef Shared_IO_dot_h
+#define Shared_IO_dot_h
+
+#include "Simple_IO.h"
+class Chunk;
+
+class Shared_IO : public Simple_IO
+{
+ protected:
+ int current_array_id_;
+ int status_flag_;
+ Chunk *subchunk_;
+ Chunk *compute_chunk_;
+ Boolean read_op_;
+ int bytes_to_go_;
+ int make_subchunks_;
+
+ Boolean get_next_chunk();
+ Boolean get_next_array();
+ Boolean get_next_subchunk();
+ Boolean start_next_subchunk_io();
+ void start_subchunk_io();
+ Boolean test_subchunk_io();
+
+ public:
+ Shared_IO(int*,int,int,int,int, App_Info*);
+ virtual ~Shared_IO();
+ virtual Boolean continue_io();
+};
+
+#endif
+
diff --git a/src/Panda/Simple_IO.C b/src/Panda/Simple_IO.C
new file mode 100644
index 0000000..a3c7bc0
--- /dev/null
+++ b/src/Panda/Simple_IO.C
@@ -0,0 +1,846 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Simple_IO.h"
+#include "Array.h"
+#include "message.h"
+
+#include "external/IEEEIO/src/Arch.h"
+#include "external/IEEEIO/src/IOProtos.h"
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+extern "C" {
+ int IOreserveChunk(IOFile,int,int,int*);
+ int IOwriteStream(IOFile,void*,int);
+ int IOreadStream(IOFile,void*,int);
+ int IOwriteAttribute(IOFile, char *, int, int, void *);
+ int IOreadInfo(IOFile,int *,int *,int *,int);
+ int IOreadAttributeInfo(IOFile, char *,int *, int *);
+ int IOreadAttribute(IOFile,int,void*);
+}
+
+/* This constructor is needed by the compute node to create a dummy object.
+ * The dummy object is needed so that the compute node can execute the
+ * specialized compute node io loop
+ */
+Simple_IO::Simple_IO()
+{
+ dummy_ = YES;
+ schema_string_ = current_schema_ptr_ = NULL;
+ current_array_ =NULL;
+ current_chunk_ = NULL;
+ num_io_nodes_ = -1;
+ my_io_rank_ = -1;
+ compute_app_num_ = -1;
+ app_info_ = NULL;
+ part_time_io_ = NO;
+ compute_node_array_ =NULL;
+ overlap_chunk_ids_ = dest_ids_ = NULL;
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+ schema_requests_ = NULL;
+ requests_ =NULL;
+ statuses_ =NULL;
+ datatypes_ = NULL;
+ schema_bufs_ = NULL;
+ data_ptrs_ = NULL;
+ overlap_base_ = overlap_size_ =overlap_stride_ =NULL;
+ mem_buf_ = NULL;
+}
+
+Simple_IO::Simple_IO(int *schema_string, int schema_size, int world_rank,
+ int comp_app_num, int comp_app_size , App_Info *app_info,
+ IOFile fp)
+{
+ int schema_buf_size;
+
+ dummy_ = NO;
+ schema_string_ = schema_string;
+ schema_size_ = schema_size;
+ current_schema_ptr_ = schema_string;
+ num_io_nodes_ = MPIFS_global_obj->app_size(IO_NODE);
+ my_io_rank_ = MPIFS_global_obj->my_rank(IO_NODE);
+ compute_app_num_ = comp_app_num;
+ app_info_ = app_info;
+ world_rank_ = world_rank;
+
+ num_overlaps_ = 0;
+ max_overlaps_ = comp_app_size;
+ overlap_chunk_ids_ = (int *) malloc(sizeof(int)*max_overlaps_);
+ dest_ids_ = (int *) malloc(sizeof(int)*max_overlaps_);
+ schema_bufs_ = (int **) malloc(sizeof(int *) *max_overlaps_);
+ requests_ = (MPI_Request*)malloc(sizeof(MPI_Request)*max_overlaps_);
+ schema_requests_ = (MPI_Request*)malloc(sizeof(MPI_Request)*max_overlaps_);
+ statuses_ = (MPI_Status*) malloc(sizeof(MPI_Status)*max_overlaps_);
+ datatypes_ = (MPI_Datatype*)malloc(sizeof(MPI_Datatype)*max_overlaps_);
+ max_rank_ = 10;
+ overlap_base_ = (int *) malloc(sizeof(int)*max_rank_);
+ overlap_stride_ = (int *) malloc(sizeof(int)*max_rank_);
+ overlap_size_ = (int *) malloc(sizeof(int)*max_rank_);
+ data_ptrs_ = (char **) malloc(sizeof(char*)*max_overlaps_);
+ part_time_io_ = NO;
+ compute_node_array_ = NULL;
+ mem_buf_size_ = MPIFS_global_obj->mem_buf_size();
+ mem_buf_ = MPIFS_global_obj->mem_buf();
+
+ schema_buf_size = 6+ max_rank_*3;
+ for(int i=0; i < max_overlaps_; i++){
+ data_ptrs_[i] = NULL;
+ schema_bufs_[i] = (int *) malloc(sizeof(int)*schema_buf_size);
+ }
+
+ current_array_ = new Array(&schema_string);
+
+ current_chunk_ = NULL;
+ num_of_chunks_ = 0;
+ num_of_subchunks_ = 0;
+ current_chunk_id_ = -1;
+ current_subchunk_id_ = -1;
+ file_ptr_ = NULL;
+ schema_file_ptr_ = NULL;
+ file_ptr_ = fp;
+}
+
+Simple_IO::~Simple_IO()
+{
+ if (dummy_){
+ } else {
+
+ /* This is the object created for the I/O nodes */
+
+ if (current_array_) delete current_array_;
+ if (schema_string_) free(schema_string_);
+ if (overlap_chunk_ids_) free(overlap_chunk_ids_);
+ if (dest_ids_) free(dest_ids_);
+ if (requests_) free(requests_);
+ if (schema_requests_) free(schema_requests_);
+ if (statuses_) free(statuses_);
+ if (datatypes_) free(datatypes_);
+ if (overlap_base_) free(overlap_base_);
+ if (overlap_size_) free(overlap_size_);
+ if (overlap_stride_) free(overlap_stride_);
+
+ if (schema_bufs_){
+ for(int i=0;i < max_overlaps_; i++){
+ if (schema_bufs_[i]) free(schema_bufs_[i]);
+ schema_bufs_[i] = NULL;
+ }
+ free(schema_bufs_);
+ }
+
+ if (data_ptrs_) free(data_ptrs_);
+
+ schema_bufs_ = NULL;
+ data_ptrs_ = NULL;
+ overlap_base_ = overlap_size_ = overlap_stride_ = NULL;
+ overlap_chunk_ids_ = dest_ids_ = NULL;
+ requests_ = NULL;
+ schema_requests_ = NULL;
+ statuses_ = NULL;
+ datatypes_ =NULL;
+ schema_string_ = NULL;
+ }
+}
+
+void Simple_IO::realloc_buffers(int new_size)
+{
+ int schema_buf_size = 6+max_rank_*3;
+
+
+ overlap_chunk_ids_=(int *) realloc(overlap_chunk_ids_, new_size*sizeof(int));
+ schema_bufs_ = (int **) realloc(schema_bufs_, new_size*sizeof(int*));
+ dest_ids_ = (int *) realloc(overlap_chunk_ids_, new_size*sizeof(int));
+ requests_ = (MPI_Request*)realloc(requests_, new_size*sizeof(MPI_Request));
+ schema_requests_ = (MPI_Request*)realloc(schema_requests_,
+ new_size*sizeof(MPI_Request));
+ statuses_ = (MPI_Status*)realloc(statuses_, new_size*sizeof(MPI_Status));
+ datatypes_ = (MPI_Datatype*)realloc(datatypes_,
+ new_size*sizeof(MPI_Datatype));
+ data_ptrs_ = (char **) realloc(data_ptrs_, new_size*sizeof(char*));
+ for(int i=max_overlaps_;i<new_size;i++){
+ schema_bufs_[i] = (int *)malloc(sizeof(int)*schema_buf_size);
+ data_ptrs_[i] = NULL;
+ }
+ max_overlaps_ = new_size;
+}
+
+/* This is called only for the following cases *
+ * - natural chunking with user-specified subchunking *
+ * - reorganization (with or without user-specified chunking) */
+void Simple_IO::compute_chunk_overlaps(Array *array, Chunk *subchunk)
+{
+ int num_compute_chunks;
+
+ if (nat_chunked_){
+ num_overlaps_ = 1;
+ overlap_chunk_ids_[0] = current_chunk_id_;
+ }
+ else{
+ num_compute_chunks = array->layout(COMPUTE_NODE)->total_elements();
+ if (num_compute_chunks > max_overlaps_) realloc_buffers(num_compute_chunks);
+ subchunk->chunk_overlaps(array, &num_overlaps_,
+ overlap_chunk_ids_, COMPUTE_NODE);
+ }
+
+ for(int i=0; i < num_overlaps_;i++) {
+ dest_ids_[i]=app_info_->world_rank(array->which_node(overlap_chunk_ids_[i],
+ COMPUTE_NODE));
+}
+
+#ifdef DEBUG
+ printf("For subchunk_id %d of chunk %d\n", current_subchunk_id_,
+ current_chunk_id_);
+ printf("The overlapping compute chunk ids are \n");
+ for(int k =0; k < num_overlaps_; k++) printf("%d ", overlap_chunk_ids_[k]);
+ printf("\n");
+#endif
+}
+
+
+/* This is called only for the following cases *
+ * - natural chunking with user-specified subchunking *
+ * - reorganization (with or without user-specified chunking) */
+void Simple_IO::compute_schemas(Array *array, Chunk *subchunk ,
+ Chunk *compute_chunk)
+{
+ if (nat_chunked_ && !contiguous_ && !overlaped_){
+ subchunk->copy_base_size_stride(overlap_base_, overlap_size_,
+ overlap_stride_);
+ send_schema_message(0);
+ make_datatype(subchunk, 0);
+ }
+ else if (!nat_chunked_) {
+ for (int i=0; i< num_overlaps_; i++){
+ compute_chunk->init(array, overlap_chunk_ids_[i], COMPUTE_NODE, NO_ALLOC);
+ subchunk->compute_overlap(compute_chunk, overlap_base_, overlap_size_,
+ overlap_stride_);
+ send_schema_message(i);
+ make_datatype(subchunk, i);
+ }
+ } else {
+ printf("Error - In Simple_IO::compute_schemas\n");
+ exit(1);
+ }
+}
+
+
+/* The chunk_id is in overlap_chunk_ids_[index], the dest is in *
+ * in dest_ids_[index]. The rank,base,stride and size info is in *
+ * overlap_base, overlap_size, overlap_stride, array_rank_ */
+void Simple_IO::send_schema_message(int index)
+{
+ int *ptr = schema_bufs_[index];
+ int schema_size = 5+array_rank_*3;
+
+ *ptr++ = overlap_chunk_ids_[index];
+ *ptr++ = (int) nat_chunked_;
+ *ptr++ = (int) contiguous_;
+ *ptr++ = array_rank_;
+ *ptr++ = op_type_;
+
+ for(int i=0; i < array_rank_; i++) *ptr++ = overlap_base_[i];
+ for(i=0; i < array_rank_; i++) *ptr++ = overlap_size_[i];
+ for(i=0; i < array_rank_; i++) *ptr++ = overlap_stride_[i];
+
+ if (part_time_io_ && (dest_ids_[index] == world_rank_))
+ /* No need to send the message */
+ schema_requests_[index] = MPI_REQUEST_NULL;
+ else
+ nb_send_message((void *)schema_bufs_[index], schema_size, MPI_INT,
+ dest_ids_[index], index*10+CHUNK_SCHEMA, MPI_COMM_WORLD,
+ &schema_requests_[index]);
+}
+
+/* The overlap base, size, stride are in overlap_base, overlap_size, *
+ * and overlap_stride */
+void Simple_IO::make_datatype(Chunk *subchunk, int index)
+{
+ void *ptr;
+ subchunk->make_datatype(overlap_base_, overlap_size_, overlap_stride_,
+ &ptr, &datatypes_[index]);
+ data_ptrs_[index] = (char *) ptr;
+}
+
+/* Again this function is called only for the following cases *
+ * - natural chunking with user-specified subchunking *
+ * - re-organization with/without user-specified chunking *
+ * The case of natural chunking (with no user-specified *
+ * subchunking) is handled seperately */
+
+void Simple_IO::receive_data(Chunk *subchunk, int index, int &array_bytes_to_go)
+{
+
+ if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+ /* Perform a mem copy of the required chunk */
+ copy_data(subchunk, index, NO, array_bytes_to_go);
+ requests_[index] = MPI_REQUEST_NULL;
+ } else
+ nb_receive_message((void *)data_ptrs_[index], 1, datatypes_[index],
+ dest_ids_[index], index*10+CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD, &requests_[index]);
+}
+
+/* Again this function is called only for the following cases *
+ * - natural chunking with user-specified subchunking *
+ * - re-organization with/without user-specified chunking *
+ * The case of natural chunking (with no user-specified *
+ * subchunking) is handled seperately */
+void Simple_IO::send_data(Chunk *subchunk, int index, int &array_bytes_to_go)
+{
+ if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+ /* Perform a memory copy of the required chunk */
+ copy_data(subchunk, index, YES, array_bytes_to_go);
+ requests_[index] =MPI_REQUEST_NULL;
+ } else {
+ /* Send the required datatype using a non-blocking send */
+ nb_send_message((void *)data_ptrs_[index], 1, datatypes_[index],
+ dest_ids_[index], index*10+CHUNK_DATA_FROM_IO,
+ MPI_COMM_WORLD, &requests_[index]);
+ }
+}
+
+void Simple_IO::read_data(Chunk *subchunk)
+{
+ int size;
+ size = subchunk->total_size_in_bytes();
+ read_data((char *)(subchunk->data_ptr()), size, subchunk->element_size());
+}
+
+void Simple_IO::read_data(char *buf, int size, int esize)
+{
+ int n,bytes_to_go=size,buf_size;
+ char *tmp_buf = buf;
+
+ while(bytes_to_go > 0){
+ buf_size = min(bytes_to_go, SUBCHUNK_SIZE);
+ n = IOreadStream(file_ptr_, (void *)tmp_buf, buf_size/esize);
+ if (n != buf_size){
+ printf("Error reading data - write only %d instead of %d bytes\n",
+ n, buf_size);
+// exit(1);
+ }
+ bytes_to_go -= buf_size;
+ tmp_buf += buf_size;
+ }
+}
+
+
+void Simple_IO::write_data(char *buf, int size, int esize)
+{
+ int n, bytes_to_go = size, buf_size;
+ char *tmp_buf = buf;
+
+ while(bytes_to_go > 0){
+ buf_size = min(bytes_to_go, SUBCHUNK_SIZE);
+ n = IOwriteStream(file_ptr_, (void *)tmp_buf, buf_size/esize);
+ if (n != buf_size){
+ printf("Error writing data - write only %d instead of %d bytes\n",
+ n, buf_size);
+ exit(1);
+ }
+ tmp_buf += buf_size;
+ bytes_to_go -= buf_size;
+ }
+}
+
+void Simple_IO::write_data(Chunk* subchunk)
+{
+ int size;
+ size = subchunk->total_size_in_bytes();
+ write_data((char *)(subchunk->data_ptr()), size, subchunk->element_size());
+}
+
+void Simple_IO::free_datatypes()
+{
+ for(int i=0; i <num_overlaps_; i++) MPI_Type_free(&datatypes_[i]);
+}
+
+void Simple_IO::send_data_to_compute_nodes(Chunk *subchunk,
+ int &array_bytes_to_go)
+{
+ for(int i=0; i< num_overlaps_; i++)
+ send_data(subchunk, i, array_bytes_to_go);
+}
+
+void Simple_IO::receive_data_from_compute_nodes(Chunk *subchunk,
+ int &array_bytes_to_go)
+{
+ for (int i=0; i< num_overlaps_; i++)
+ receive_data(subchunk, i, array_bytes_to_go);
+}
+
+void Simple_IO::wait_for_completion(int &array_bytes_to_go,
+ Array *compute_array)
+{
+ int flag=0;
+
+ if (part_time_io_){
+ /* This is to avoid deadlocks */
+ while (!flag){
+ MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+ if (array_bytes_to_go > 0)
+ process_compute_message(array_bytes_to_go, compute_array);
+ }
+ } else {
+ MPI_Waitall(num_overlaps_, requests_, statuses_);
+ }
+ /* Free the schema request objects - Do we need this*/
+ MPI_Waitall(num_overlaps_, schema_requests_, statuses_);
+}
+
+/* For part-io nodes, get the data using memory copy if the *
+ * data resides on the same node. */
+void Simple_IO::copy_data(Chunk *subchunk, int index, Boolean flag,
+ int &array_bytes_to_go)
+{
+ void *comp_data_ptr;
+ MPI_Datatype comp_datatype;
+ int position=0, buf_size;
+ void *buf=NULL;
+ int *schema = schema_bufs_[index];
+ int comp_chunk_id = schema[0];
+ int comp_array_rank = schema[3];
+ int *base = &schema[5];
+ int *size = &schema[5+comp_array_rank*1];
+ int *stride = &schema[5+comp_array_rank*2];
+ int bytes_copied = num_elements(comp_array_rank, size)*
+ subchunk->element_size();
+ Array *comp_array = compute_node_array_;
+ Chunk *comp_chunk = comp_array->find_chunk(comp_chunk_id);
+ comp_chunk->make_datatype(base, size,stride, &comp_data_ptr,
+ &comp_datatype);
+ if (array_bytes_to_go > 0) array_bytes_to_go -= bytes_copied;
+
+ if (flag){
+ MPI_Pack_size(1, datatypes_[index], MPI_COMM_WORLD, &buf_size);
+ buf = (void *) malloc(buf_size);
+ MPI_Pack(data_ptrs_[index], 1, datatypes_[index], buf, buf_size,
+ &position, MPI_COMM_WORLD);
+ position =0;
+ MPI_Unpack(buf, buf_size, &position, comp_data_ptr, 1, comp_datatype,
+ MPI_COMM_WORLD);
+ free(buf);
+ } else {
+ MPI_Pack_size(1, comp_datatype, MPI_COMM_WORLD, &buf_size);
+ buf = (void *) malloc(buf_size);
+ MPI_Pack(comp_data_ptr, 1, comp_datatype, buf, buf_size,
+ &position, MPI_COMM_WORLD);
+ position = 0;
+ MPI_Unpack(buf, buf_size, &position, data_ptrs_[index], 1,
+ datatypes_[index], MPI_COMM_WORLD);
+ free(buf);
+ }
+ MPI_Type_free(&comp_datatype);
+}
+
+/* For nat chunking with no user defined subchunking, read/write
+ * data directly from compute chunk (i.e if it is on same node) */
+void Simple_IO::direct_io(int chunk_id, Boolean flag, int &array_bytes_to_go)
+{
+ Array *comp_array = compute_node_array_;
+ Chunk *comp_chunk = comp_array->find_chunk(chunk_id);
+ if (flag) read_data(comp_chunk);
+ else write_data(comp_chunk);
+ if (array_bytes_to_go > 0)
+ array_bytes_to_go -= comp_chunk->total_size_in_bytes();
+}
+
+void Simple_IO::realloc_schema_bufs(int new_size)
+{
+ int schema_buf_size = sizeof(int)*(6+new_size*3);
+
+ max_rank_ = new_size;
+ overlap_base_ = (int *) realloc(overlap_base_, max_rank_*sizeof(int));
+ overlap_size_ = (int *) realloc(overlap_size_, max_rank_*sizeof(int));
+ overlap_stride_ = (int *) realloc(overlap_stride_, max_rank_*sizeof(int));
+ for(int i=0; i < max_overlaps_; i++){
+ schema_bufs_[i] = (int *) realloc(schema_bufs_[i], schema_buf_size);
+ }
+}
+
+void Simple_IO::realloc_mem_bufs(int new_size)
+{
+ mem_buf_size_ = new_size;
+ mem_buf_ = (char *) realloc(mem_buf_, sizeof(char)*mem_buf_size_);
+ MPIFS_global_obj->set_mem_buf_size(new_size);
+ MPIFS_global_obj->set_mem_buf(mem_buf_);
+}
+
+void Simple_IO::start_to_finish(Boolean part_time, Array *compute_array)
+{
+ int make_subchunks, bytes_to_go;
+ int array_bytes_to_go,*ptr;
+ Boolean read_op;
+ Chunk *chunk=NULL, *subchunk=NULL, *compute_chunk=NULL, *tmp_chunk;
+
+ op_type_ = current_array_->op_type();
+ if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+ (op_type_ == READ_TIMESTEP))
+ read_op = YES;
+ else
+ read_op = NO;
+
+ part_time_io_ = part_time;
+ compute_node_array_ = compute_array;
+
+ if (read_op) {
+ int numbertype, rank, index, datatype, length;
+ int *dims = (int *)malloc(sizeof(int) * 10);
+ IOreadInfo(file_ptr_, &numbertype, &rank, dims, 10);
+ int *size = (int *)malloc(sizeof(int) * rank);
+
+ index = IOreadAttributeInfo(file_ptr_, "global_size", &datatype, &length);
+ if (index >=0 ) { // the attribute exists
+ IOreadAttribute(file_ptr_, index, size);
+ current_array_->init(rank, numbertype, size, IO_NODE);
+ } else { printf("Error: no attribute, global_size\n"); exit(0); }
+ free(dims);
+
+printf("%d: read rank %d, numbertype %d, size (%d %d %d)\n", world_rank_,
+ rank, numbertype, size[0], size[1], size[2]);
+
+ int schema_size = 2 + rank;
+ int *schema = (int *)malloc(sizeof(int) * schema_size);
+ if (MPIFS_global_obj->am_master_io_node()) {
+ schema[0] = rank; schema[1] = numbertype;
+ for (int i=0; i<rank; i++) schema[2+i] = size[i];
+ send_message((void *)schema, schema_size, MPI_INT,
+ app_info_->get_master(),
+ ARRAYGROUP_SCHEMA, MPI_COMM_WORLD);
+ }
+ if (part_time_io_) {
+ MPI_Status status;
+ receive_message(schema, schema_size, MPI_INT, MPI_ANY_SOURCE,
+ ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *)schema,
+ schema_size, MPI_INT, ARRAYGROUP_SCHEMA);
+
+ compute_array->init(rank, numbertype, size, COMPUTE_NODE);
+ }
+ free(schema);
+ }
+
+ if (part_time_io_) array_bytes_to_go = compute_node_array_->array_info();
+
+ /* To reduce costs associated with object creation and deletion, we *
+ * will create a dummy chunk,subchunk and compute chunk object and *
+ * re-initialize them whenever necessary. */
+ tmp_chunk = chunk = new Chunk();
+ current_chunk_ = chunk;
+ subchunk = new Chunk();
+ compute_chunk = new Chunk();
+
+ make_subchunks = -1;
+
+ nat_chunked_ = current_array_->nat_chunked();
+ sub_chunked_ = current_array_->sub_chunked();
+ overlaped_ = current_array_->overlaped();
+ if (overlaped_) { contiguous_ = NO; nat_chunked_ = NO; }
+ else {
+ if (nat_chunked_ && !sub_chunked_)
+ contiguous_ = YES; /* No need to use derived datatypes */
+ else contiguous_ = NO; /* Have to use derived datatypes */
+ }
+
+ array_rank_ = current_array_->rank();
+ if (array_rank_ > max_rank_) realloc_schema_bufs(array_rank_);
+
+ if (read_op) current_array_->read_schema_file(file_ptr_);
+
+ num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+ current_chunk_id_ = current_array_->get_next_index(chunk, -1, my_io_rank_,
+ num_io_nodes_,
+ num_of_chunks_);
+
+#ifdef DEBUG
+ printf("%d: current_chunk_id_=%d my_io_rank=%d num_io_nodes=%d\n",
+ world_rank_, current_chunk_id_, my_io_rank_, num_io_nodes_);
+#endif
+ if (contiguous_){
+ /* Natural chunked and no user-specified subchunking. Therefore we don't
+ * need to used mpi-derived datatypes. */
+
+ while (current_chunk_id_ < num_of_chunks_) {
+ if (!read_op) {
+ int *tmp_size = (int *)malloc(sizeof(int) * array_rank_);
+ for (int cnt = 0; cnt < array_rank_; cnt++)
+ tmp_size[cnt] = chunk->size()[array_rank_ - cnt - 1];
+ IOreserveChunk(file_ptr_, current_array_->ieee_size(),
+ array_rank_, tmp_size);
+ //printf("##### called IOreserveChunk for n.c. %d %d %d %d %d\n", current_array_->ieee_size(), array_rank_, tmp_size[0], tmp_size[1], tmp_size[2]);
+
+ free(tmp_size);
+ if (num_of_chunks_ > 1) {
+ IOwriteAttribute(file_ptr_,"chunk_origin", INT32, 3, chunk->base());
+ IOwriteAttribute(file_ptr_, "chunk_size", INT32, 3, chunk->size());
+ }
+ }
+
+ /* for part-time io case, if chunk resides on same node, perform the *
+ * read/write operation directly. */
+ num_overlaps_ = 1;
+ overlap_chunk_ids_[0] = current_chunk_id_;
+ dest_ids_[0] = app_info_->world_rank(current_array_->which_node(
+ current_chunk_id_, COMPUTE_NODE));
+
+ if (part_time_io_ && (world_rank_ == dest_ids_[0])){
+ direct_io(current_chunk_id_, read_op, array_bytes_to_go);
+ } else {
+ bytes_to_go = chunk->total_size_in_bytes();
+ chunk->set_data_ptr(mem_buf_);
+
+ /* Make the schema request */
+ ptr = schema_bufs_[0];
+ *ptr++ = current_chunk_id_;
+ *ptr++ = (int)nat_chunked_;
+ *ptr++ = (int)contiguous_;
+ *ptr++ = op_type_;
+ *ptr++ = 0; /* This is the offset */
+ *ptr++ = 0; /* Size of the data */
+
+ ptr = schema_bufs_[0];
+ while(bytes_to_go > 0){
+ ptr[5] = min(SUBCHUNK_SIZE, bytes_to_go);
+
+ nb_send_message((void *)ptr, 6, MPI_INT, dest_ids_[0],
+ CHUNK_SCHEMA, MPI_COMM_WORLD, &schema_requests_[0]);
+ if (read_op){
+ read_data(mem_buf_, ptr[5], chunk->element_size());
+ nb_send_message((void *)mem_buf_, ptr[5], MPI_CHAR, dest_ids_[0],
+ CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+ } else
+ nb_receive_message((void *)mem_buf_, ptr[5], MPI_CHAR,
+ dest_ids_[0], CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD, &requests_[0]);
+ /* Have to watch for deadlock over here */
+ wait_for_completion(array_bytes_to_go, compute_node_array_);
+ if (!read_op) write_data(mem_buf_, ptr[5], chunk->element_size());
+ ptr[4] += ptr[5];
+ bytes_to_go -= ptr[5];
+ }
+ chunk->set_data_ptr(NULL);
+ }
+ current_chunk_id_ = current_array_->get_next_index(chunk,
+ current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_,
+ num_of_chunks_);
+ } /* End while */
+ } /* End if (contiguous_) */
+ else {
+ /* We have no choice but to use MPI-derived datatypes */
+ while(current_chunk_id_ < num_of_chunks_){
+ if (!read_op) {
+ int *tmp_size = (int *)malloc(sizeof(int) * array_rank_);
+ for (int cnt = 0; cnt < array_rank_; cnt++)
+ tmp_size[cnt] = chunk->size()[array_rank_ - cnt - 1];
+ IOreserveChunk(file_ptr_, current_array_->ieee_size(),
+ array_rank_, tmp_size);
+ //printf("##### called IOreserveChunk for r.o. %d %d %d %d %d\n", current_array_->ieee_size(), array_rank_, tmp_size[0], tmp_size[1], tmp_size[2]);
+
+ free(tmp_size);
+ if (num_of_chunks_ > 1) {
+ IOwriteAttribute(file_ptr_,"chunk_origin", INT32, 3, chunk->base());
+ IOwriteAttribute(file_ptr_, "chunk_size", INT32, 3, chunk->size());
+ }
+ }
+
+ /* If the array is not subchunked, then subchunk the array into *
+ * SUBCHUNK_SIZE chunks. This is to reduce the size of the *
+ * messages and the memory requirements. The current version makes a *
+ * dumb assumption, that if the user specifies the subchunks, *
+ * then the size of those subchunks is less than SUBCHUNK_SIZE. *
+ * It's a dumb assumption and needs to be fixed. */
+
+ if (!sub_chunked_ && (make_subchunks == -1)){
+ current_array_->make_sub_chunks(chunk);
+ make_subchunks = 1;
+ }
+ num_of_subchunks_ =current_array_->layout(SUB_CHUNK)->total_elements();
+
+ for (current_subchunk_id_=0; current_subchunk_id_ < num_of_subchunks_;
+ current_subchunk_id_++){
+ subchunk->init(chunk, current_subchunk_id_, NO_ALLOC);
+ bytes_to_go = subchunk->total_size_in_bytes();
+
+ if (bytes_to_go > mem_buf_size_) realloc_mem_bufs(bytes_to_go);
+ subchunk->set_data_ptr(mem_buf_);
+
+ compute_chunk_overlaps(current_array_, subchunk);
+ compute_schemas(current_array_, subchunk, compute_chunk);
+
+ if (read_op){
+ read_data(subchunk);
+ send_data_to_compute_nodes(subchunk, array_bytes_to_go);
+ } else receive_data_from_compute_nodes(subchunk, array_bytes_to_go);
+ wait_for_completion(array_bytes_to_go, compute_node_array_);
+ if (!read_op) write_data(subchunk);
+
+ free_datatypes();
+ subchunk->set_data_ptr(NULL);
+ }
+ current_chunk_id_ = current_array_->get_next_index(chunk,
+ current_chunk_id_,
+ my_io_rank_,
+ num_io_nodes_,
+ num_of_chunks_);
+ } /* End while loop */
+ } /* End if else */
+
+#ifdef DEBUG
+ printf("%d:Finished the I/O\n", world_rank_);
+#endif
+ if (part_time_io_){
+ /* Since the I/O side is finished jump into the compute loop */
+ while (array_bytes_to_go > 0)
+ process_compute_message(array_bytes_to_go, compute_node_array_);
+#ifdef DEBUG
+ printf("%d:Finished the compute side of the part-time io\n", world_rank_);
+#endif
+ }
+
+ /* Delete chunk, subchunk, compute_chunk */
+ if (tmp_chunk) delete tmp_chunk;
+ if (subchunk) delete subchunk;
+ if (compute_chunk) delete compute_chunk;
+ chunk=subchunk=compute_chunk=NULL;
+}
+
+/* This function should not access any of the instance variables of
+ * the Simple_IO object without setting them first
+ */
+void Simple_IO::compute_node_io_loop(Array *array)
+{
+ int op_type = array->op_type();
+ if ((op_type == RESTART) || (op_type == GENERAL_READ) ||
+ (op_type == READ_TIMESTEP)) {
+ MPI_Status status;
+ int *schema, schema_size;
+
+ MPI_Probe(MPI_ANY_SOURCE, ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+ mpi_get_count(&status, MPI_INT, &schema_size);
+ schema = (int *)malloc(sizeof(int) * schema_size);
+ receive_message((void *)schema, schema_size, MPI_INT, status.MPI_SOURCE,
+ ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+ MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *)schema,
+ schema_size, MPI_INT, ARRAYGROUP_SCHEMA);
+
+ int *size = (int *)malloc(sizeof(int) * schema[0]);
+ for (int i=0; i<schema[0]; i++) size[i] = schema[2+i];
+printf("%d: read rank %d, numbertype %d, size (%d %d %d)\n", world_rank_,
+ schema[0], schema[1], size[0], size[1], size[2]);
+ array->init(schema[0], schema[1], size, COMPUTE_NODE);
+ free(schema);
+ }
+
+ int array_bytes_to_go = array->array_info();
+ while (array_bytes_to_go > 0)
+ process_compute_message(array_bytes_to_go, array);
+}
+
+void Simple_IO::process_compute_message(int &arrays_bytes_to_go,
+ Array *array)
+{
+ int msg_code, msg_tag, msg_src;
+ MPI_Status status;
+ int data_size;
+
+ any_new_message(&msg_code, &msg_src, &msg_tag, &status);
+ switch(msg_code){
+ case CHUNK_SCHEMA:
+ /* Do something about it */
+ process_chunk_schema_request(msg_src,msg_tag, arrays_bytes_to_go,
+ &status, array);
+ break;
+
+ case CHUNK_DATA_FROM_IO:
+ MPI_Get_count(&status, MPI_CHAR, &data_size);
+ printf("Received chunk_data before chunk schema from %d of size %d\n",
+ msg_src, data_size);
+ MPI_Probe(msg_src, (msg_tag/10)*10+CHUNK_SCHEMA, MPI_COMM_WORLD, &status);
+ printf("Received the corressponding chunk schema message\n");
+ process_chunk_schema_request(msg_src, (msg_tag/10)*10+CHUNK_SCHEMA,
+ arrays_bytes_to_go,
+ &status, array);
+ break;
+
+ case NO_MESSAGE:
+ /* Do nothing */
+ break;
+ default:
+ /* This message is not for me */
+ printf("In process compute message - unknown code %d\n", msg_code);
+ break;
+ }
+}
+
+void Simple_IO::process_chunk_schema_request(int msg_src, int msg_tag,
+ int &array_bytes_to_go,
+ MPI_Status *status, Array *array)
+{
+ int *schema_buf, schema_size;
+ int chunk_id, op_type, array_rank, *base, *size, *stride, *ptr;
+ int data_size, elt_size, offset;
+ Boolean contiguous;
+ MPI_Datatype datatype;
+ Chunk *chunk;
+ void *data_ptr;
+
+ MPI_Get_count(status, MPI_INT, &schema_size);
+ schema_buf = (int *) malloc(sizeof(int)*schema_size);
+ receive_message((void *)schema_buf, schema_size, MPI_INT, msg_src,
+ msg_tag, MPI_COMM_WORLD, status);
+
+ ptr = schema_buf;
+ chunk_id = *ptr++;
+ ptr++;
+ contiguous = (Boolean) *ptr++;
+ chunk = array->find_chunk(chunk_id);
+
+ if (contiguous){
+ op_type = *ptr++;
+ offset = *ptr++;
+ data_size = *ptr++;
+ data_ptr = chunk->data_ptr();
+ data_ptr = (char *)((char *) data_ptr + offset);
+
+ if ((op_type == RESTART) || (op_type == READ_TIMESTEP) ||
+ (op_type == GENERAL_READ))
+ receive_message((void *) data_ptr,
+ data_size,
+ MPI_CHAR, msg_src,
+ (msg_tag/10*10)+CHUNK_DATA_FROM_IO,
+ MPI_COMM_WORLD, status);
+ else
+ send_message((void *)data_ptr, data_size,MPI_CHAR, msg_src,
+ (msg_tag/10)*10+CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD);
+
+ }
+ else{
+ array_rank = *ptr++;
+ op_type = *ptr++;
+ base = &ptr[0];
+ size = &ptr[array_rank*1];
+ stride = &ptr[array_rank*2];
+ elt_size = chunk->element_size();
+ data_size = num_elements(array_rank, size)*elt_size;
+
+ chunk->make_datatype(base,size,stride, &data_ptr, &datatype);
+ if ((op_type == RESTART) || (op_type == READ_TIMESTEP) ||
+ (op_type == GENERAL_READ))
+ receive_message(data_ptr, 1, datatype,msg_src,
+ (msg_tag/10)*10+CHUNK_DATA_FROM_IO,
+ MPI_COMM_WORLD, status);
+ else
+ send_message(data_ptr, 1, datatype, msg_src,
+ (msg_tag/10)*10+CHUNK_DATA_TO_IO,
+ MPI_COMM_WORLD);
+ MPI_Type_free(&datatype);
+ }
+
+ array_bytes_to_go -= data_size;
+ free(schema_buf);
+}
diff --git a/src/Panda/Simple_IO.h b/src/Panda/Simple_IO.h
new file mode 100644
index 0000000..4df4831
--- /dev/null
+++ b/src/Panda/Simple_IO.h
@@ -0,0 +1,91 @@
+#ifndef Simple_IO_dot_h
+#define Simple_IO_dot_h
+
+#include "Collective_IO.h"
+
+
+class ArrayGroup;
+class Array;
+class Chunk;
+class App_Info;
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+
+class Simple_IO : public Collective_IO
+{
+ protected:
+ Boolean dummy_; /* Do the instance variables mean anything */
+ int *schema_string_;
+ int schema_size_;
+ int *current_schema_ptr_;
+ Array *current_array_;
+ Chunk *current_chunk_;
+ int num_of_subchunks_;
+ int current_subchunk_id_;
+ int num_of_chunks_;
+ int current_chunk_id_;
+ IOFile file_ptr_;
+ FILE *schema_file_ptr_;
+ int num_io_nodes_;
+ int my_io_rank_;
+ int compute_app_num_;
+ App_Info *app_info_;
+ Boolean part_time_io_;
+ Array *compute_node_array_;
+ int op_type_;
+ Boolean nat_chunked_;
+ Boolean sub_chunked_;
+ Boolean overlaped_;
+ Boolean contiguous_;
+ int world_rank_;
+
+ int num_overlaps_;
+ int max_overlaps_;
+ int *overlap_chunk_ids_;
+ int *dest_ids_;
+ int **schema_bufs_;
+ MPI_Request *schema_requests_;
+ MPI_Request *requests_;
+ MPI_Status *statuses_;
+ MPI_Datatype *datatypes_;
+ int max_rank_;
+ int array_rank_;
+ int *overlap_base_;
+ int *overlap_stride_;
+ int *overlap_size_;
+ char **data_ptrs_;
+ char *mem_buf_;
+ int mem_buf_size_;
+
+
+ void realloc_buffers(int);
+ void compute_chunk_overlaps(Array*,Chunk*);
+ void compute_schemas(Array*,Chunk*,Chunk*);
+ virtual void send_schema_message(int);
+ void make_datatype(Chunk*,int);
+ void receive_data(Chunk*,int, int&);
+ void send_data(Chunk*, int, int&);
+ void read_data(Chunk*);
+ void read_data(char*,int,int);
+ void write_data(char*,int,int);
+ void write_data(Chunk*);
+ void copy_data(Chunk*,int,Boolean,int&);
+ void direct_io(int,Boolean,int&);
+ void free_datatypes();
+ void wait_for_completion(int&,Array*);
+ void send_data_to_compute_nodes(Chunk*, int&);
+ void receive_data_from_compute_nodes(Chunk*, int&);
+ void realloc_schema_bufs(int);
+ void realloc_mem_bufs(int);
+ void process_compute_message(int&,Array*);
+ void process_chunk_schema_request(int,int,int&,MPI_Status*,Array*);
+ public:
+ Simple_IO();
+ Simple_IO(int*,int,int,int, int , App_Info*, IOFile);
+ virtual ~Simple_IO();
+ virtual void start_to_finish(Boolean part_time_io,Array*);
+ virtual void compute_node_io_loop(Array*);
+};
+
+#endif
diff --git a/src/Panda/StopWatch.h b/src/Panda/StopWatch.h
new file mode 100644
index 0000000..e38c5d7
--- /dev/null
+++ b/src/Panda/StopWatch.h
@@ -0,0 +1,34 @@
+#ifndef StopWatch_dot_h
+#define StopWatch_dot_h
+
+#include <stdio.h>
+#include <mpi.h>
+
+class StopWatch
+{
+ private:
+ double start_t,finish_t;
+ char description[200];
+
+ public:
+ StopWatch () { start_t = finish_t = -1; }
+ ~StopWatch() { };
+ void start() { start_t = MPI_Wtime(); }
+ void stop (char *desc)
+ {
+ finish_t = MPI_Wtime();
+ if (start_t == -1.0)
+ fprintf(stderr, "StopWatch: must start before stop\n");
+ else
+ sprintf(description, "%s elapsed time: %f, (%f, %f)\n"
+ ,desc
+ ,finish_t-start_t
+ ,start_t, finish_t);
+ start_t = finish_t = -1;
+ }
+ char *get_description() { return description;}
+
+};
+
+
+#endif
diff --git a/src/Panda/Template.C b/src/Panda/Template.C
new file mode 100644
index 0000000..5600e2f
--- /dev/null
+++ b/src/Panda/Template.C
@@ -0,0 +1,40 @@
+#include "definitions.h"
+#include "Template.h"
+
+Template::Template(int Rank, int *sizearray)
+{
+ rank_ = Rank;
+ if (sizearray) size_ = copy_int_list(Rank, sizearray);
+}
+
+Template::Template()
+{
+ rank_ = 0;
+ size_ = NULL;
+}
+
+Template::~Template()
+{
+ if (size_ != NULL) free(size_);
+ size_ = NULL;
+}
+
+int Template::rank()
+{
+ return rank_;
+}
+
+int* Template::size()
+{
+ return size_;
+}
+
+int Template::total_elements()
+{
+ return num_elements(rank_, size_);
+}
+
+int Template::size(int i)
+{
+ return size_[i];
+}
diff --git a/src/Panda/Template.h b/src/Panda/Template.h
new file mode 100644
index 0000000..ff9483a
--- /dev/null
+++ b/src/Panda/Template.h
@@ -0,0 +1,22 @@
+#ifndef template_dot_h
+#define template_dot_h
+
+#include<malloc.h>
+
+class Template {
+ protected:
+ int rank_;
+ int *size_;
+
+ public:
+ Template();
+ Template(int Rank, int *sizearray);
+ virtual ~Template();
+ int rank();
+ int* size();
+ int size(int);
+ int total_elements();
+};
+
+#endif
+
diff --git a/src/Panda/VirtFS.C b/src/Panda/VirtFS.C
new file mode 100644
index 0000000..f220392
--- /dev/null
+++ b/src/Panda/VirtFS.C
@@ -0,0 +1,4 @@
+#include "VirtFS.h"
+
+// jozwiak 030295
+// there should be nothing here of substance...since VirtFS is purely virtual
diff --git a/src/Panda/VirtFS.h b/src/Panda/VirtFS.h
new file mode 100644
index 0000000..8b13a14
--- /dev/null
+++ b/src/Panda/VirtFS.h
@@ -0,0 +1,14 @@
+#ifndef VirtFS_dot_h
+#define VirtFS_dot_h
+
+#include <stdio.h>
+
+
+class VirtFS
+{
+ public:
+ VirtFS(){} ;
+ virtual ~VirtFS(){} ;
+};
+
+#endif
diff --git a/src/Panda/c_interface.C b/src/Panda/c_interface.C
new file mode 100644
index 0000000..b6085d6
--- /dev/null
+++ b/src/Panda/c_interface.C
@@ -0,0 +1,172 @@
+/* This is a simple interface for c applications to use Panda */
+#include <stdio.h>
+#include <stdlib.h>
+#include "definitions.h"
+#include "Panda.h"
+#include "MPIFS.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+
+
+extern "C" { int Panda_Create(int, int); }
+extern "C" { void Panda_Finalize(); }
+extern "C" { void PandaTimestep(struct ArrayInfo *); }
+extern "C" { void *PandaReadTimestep(struct ArrayInfo *); }
+extern "C" { void Panda_WriteAttribute(char *, char *, int, int, void *); }
+extern "C" { void *Panda_ReadAttribute(char *, char *, int *, int *); }
+extern "C" { Boolean PandaIsNewFile(char *); }
+
+typedef struct ArrayInfo {
+ char* name_; /* array name */
+ int rank_; /* rank */
+ int* size_; /* glbal size of the array */
+ int esize_; /* size of each element */
+ int mem_rank_; /* compute processor topology - rank */
+ int* mem_layout_; /* compute processor topology - mesh */
+ Distribution* mem_dist_; /* compute processor topology - dist */
+ int disk_rank_; /* io processor topology - rank */
+ int* disk_layout_; /* io processor topology - mesh */
+ Distribution* disk_dist_; /* io processor topology - dist */
+ char* data_; /* data pointer belonging to me */
+ int stencil_width_; /* stencil width */
+ struct ArrayInfo *next_; /* next element */
+} ArrayInfo;
+
+Panda *global_bear = NULL;
+extern MPIFS *MPIFS_global_obj;
+
+int Panda_Create(int ioproc_every, int is_part_time_mode)
+{
+ int i, my_app_size, my_rank, *world_ranks;
+ int io_nodes;
+
+/* if (io_nodes > 1) {
+ printf("Warning: Write Chunks instead of Write arrays.\n");
+ printf("There might be errors in Attributes write\n");
+ }*/
+
+ MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+ for (i=0; i<my_app_size; i++) world_ranks[i] = i;
+ io_nodes = (my_app_size - 1) / ioproc_every + 1;
+
+ if (is_part_time_mode) {
+ if (my_rank < io_nodes) { /* part-time io nodes */
+ global_bear = new Panda(PART_TIME_IO, my_rank, my_app_size, world_ranks,
+ my_rank, io_nodes, world_ranks);
+ //printf("##### Panda proc %d/%d PART_TIME_IO\n", my_rank, my_app_size);
+ } else { /* part-time compute nodes */
+ global_bear = new Panda(PART_TIME_COMPUTE, my_rank, my_app_size,
+ world_ranks, -1, io_nodes, world_ranks);
+ //printf("##### Panda proc %d/%d PART_TIME_COMPUTE\n", my_rank, my_app_size);
+ }
+ } else {
+// printf("Warning: Full-time I/O nodes is not integrated with Cactus yet, ");
+// printf("due to the communicator problem. Panda's part is done, though\n");
+ if (my_rank < io_nodes) { /* full-time io nodes */
+ global_bear = new Panda(IO_NODE, 0, my_rank, io_nodes, world_ranks);
+ delete global_bear;
+ free(world_ranks);
+ return 1;
+ } else { /* compute nodes */
+ for (i=0; i<(my_app_size-io_nodes); i++) world_ranks[i] += io_nodes;
+ global_bear = new Panda(COMPUTE_NODE, 1, my_rank-io_nodes,
+ my_app_size-io_nodes, world_ranks);
+ }
+ }
+ free(world_ranks);
+ return 0;
+}
+
+void Panda_Finalize()
+{
+ if (global_bear) delete global_bear;
+}
+
+
+void PandaTimestep(ArrayInfo *ptr)
+{
+/* Test if Panda_Create() has been called */
+ if (global_bear == NULL) {
+ printf("Panda object is not created yet - Use Panda_Create(...)\n");
+ return;
+ }
+
+/* Create array information */
+ ArrayLayout *mem_layout, *disk_layout;
+ Array *array;
+
+ mem_layout = new ArrayLayout(ptr->mem_rank_, ptr->mem_layout_);
+ disk_layout = new ArrayLayout(ptr->disk_rank_, ptr->disk_layout_);
+
+ array = new Array(ptr->name_, ptr->rank_, ptr->size_, ptr->esize_,
+ mem_layout, ptr->mem_dist_,
+ disk_layout, ptr->disk_dist_,
+ ptr->data_, ptr->stencil_width_);
+
+ global_bear->app_barrier();
+ printf("---------------- Panda Timestep -------------------\n");
+ //printf("name %s rank %d size %d %d %d esize %d mem_layout %d %d %d disk_layout %d stencil_width_ %d\n", ptr->name_, ptr->rank_, ptr->size_[0], ptr->size_[1], ptr->size_[2], ptr->esize_, ptr->mem_layout_[0], ptr->mem_layout_[1], ptr->mem_layout_[2], ptr->disk_layout_[0], ptr->stencil_width_);
+
+ array->timestep();
+
+ delete mem_layout;
+ delete disk_layout;
+ delete array;
+}
+
+void *PandaReadTimestep(ArrayInfo *ptr)
+{
+/* Test if Panda_Create() has been called */
+ if (global_bear == NULL) {
+ printf("Panda object is not created yet - Use Panda_Create(...)\n");
+ return NULL;
+ }
+
+/* Create array information */
+ ArrayLayout *mem_layout, *disk_layout;
+ Array *array;
+
+ mem_layout = new ArrayLayout(ptr->mem_rank_, ptr->mem_layout_);
+ disk_layout = NULL;
+ array = new Array(ptr->name_, ptr->rank_, ptr->size_, ptr->esize_,
+ mem_layout, ptr->mem_dist_,
+ disk_layout, ptr->disk_dist_,
+ ptr->data_, ptr->stencil_width_);
+
+ printf("---------------- Panda ReadTimestep -------------------\n");
+ global_bear->app_barrier();
+ array->read_timestep();
+ void *data = (void *)array->get_data_ptr();
+ array->set_data_ptr(NULL);
+
+ delete mem_layout;
+ delete array;
+ return data;
+}
+
+void Panda_WriteAttribute(char *fname, char *name, int esize,
+ int count, void *data)
+{
+ Attribute *attr = new Attribute();
+ attr->write(fname, name, esize, count, data);
+ delete attr;
+}
+
+void *Panda_ReadAttribute(char *fname, char *name, int *type, int *count)
+{
+ Attribute *attr = new Attribute();
+ attr->read(fname, name);
+ void *data = attr->get_data_ptr();
+ attr->set_data_ptr(NULL);
+ *type = attr->esize();
+ *count = attr->count();
+ delete attr;
+ return data;
+}
+
+Boolean PandaIsNewFile(char *fname)
+{
+ return MPIFS_global_obj->is_new_file(fname);
+}
diff --git a/src/Panda/c_interface.h b/src/Panda/c_interface.h
new file mode 100644
index 0000000..b167f6f
--- /dev/null
+++ b/src/Panda/c_interface.h
@@ -0,0 +1,28 @@
+#ifndef _included_C_Interface_h
+#define _included_C_Interface_h
+
+#include "external/IEEEIO/src/IEEEIO.h"
+
+
+typedef enum { NONE,
+ BLOCK,
+ GENERAL,
+ CYCLIC
+ } Distribution;
+typedef struct ArrayInfo {
+ char* name_; /* array name */
+ int rank_; /* rank */
+ int* size_; /* glbal size of the array */
+ int esize_; /* size of each element */
+ int mem_rank_; /* compute processor topology - rank */
+ int* mem_layout_; /* compute processor topology - mesh */
+ Distribution* mem_dist_; /* compute processor topology - dist */
+ int disk_rank_; /* io processor topology - rank */
+ int* disk_layout_; /* io processor topology - mesh */
+ Distribution* disk_dist_; /* io processor topology - dist */
+ char* data_; /* data pointer belonging to me */
+ int stencil_width_; /* stencil width */
+ struct ArrayInfo *next_; /* next element */
+} ArrayInfo;
+
+#endif
diff --git a/src/Panda/compute_test.C b/src/Panda/compute_test.C
new file mode 100644
index 0000000..fc61f34
--- /dev/null
+++ b/src/Panda/compute_test.C
@@ -0,0 +1,350 @@
+/*****************************************************************
+ * This is a sample program that shows how the panda library *
+ * is going to be used by the application programs. *
+ * The example command line format is in test7.script. *
+ * This example shows the interface with only disk layout *
+ * info but no stride or subchunking schema. The value for *
+ * those schemas use the default ones. *
+ * The current test varies the size of arrays. However, the *
+ * wrapper function allows the number of the nodes to be *
+ * changed as well. *
+ * The first iteration loads all the code in memory. *
+ * The second run does the simulated disk simulation. *
+ * From the third run on, the values are the real writes. *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag=0;
+ char time_message[100];
+
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+ t1->set_verify();
+#endif
+
+
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+
+ for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+ if (i < Num_Simulate_Write){
+ t1->set_simulate_mode();
+ flag=0;
+ }
+ else {
+ t1->reset_simulate_mode();
+ flag=1;
+ }
+
+
+ global_bear->app_barrier();
+ t1->set_io_strategy(STRATEGY);
+ timer.start();
+ t1->timestep();
+ timer.stop(":");
+ sprintf(time_message,"%s Write: SIZE: %d, Time %i %s",
+ (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+
+ if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1 ) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+ }
+ }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag;
+ char time_message[100];
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern();
+#endif
+
+ if (Num_Write + Num_Simulate_Write == 0) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+ }
+
+
+
+ for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+ if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+ else {r1->reset_simulate_mode();
+ flag=1;
+ global_bear->flushfiles();
+ }
+
+
+ global_bear->app_barrier();
+ r1->set_io_strategy(STRATEGY);
+ timer.start();
+ r1->read_timestep();
+ timer.stop(":");
+
+ sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ",
+ (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+ for(i=0;i<Num_of_Arrays;i++)
+ if (arrays[i]->verify_byte_pattern())
+ printf("Byte pattern verified for array %d\n", i);
+ else
+ printf("Byte pattern incorrect for array %d\n", i);
+#endif
+ global_bear->cleanfiles();
+}
+
+
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+ int mrank, int *mlayout, int drank, int *dlayout,
+ Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+ ArrayLayout *mem1; // Memory array layout
+ ArrayLayout *disk1; // Disk array layout
+ int i;
+ Array **arrays;
+ arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+ mem1 = new ArrayLayout (mrank,mlayout);
+ disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation.
+ char *name;
+ name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+ char temp[5];
+ for (i=0; i< Num_of_Arrays; i++) {
+ strcpy(name,"z1Array");
+ sprintf(temp, "%d", i);
+ strcat(name, temp);
+ arrays[i] = new Array(name,arrayrank,arraysize,esize,
+ mem1,mem_dist,disk1, disk_dist);
+ }
+ free(name);
+
+ if (Num_Simulate_Write + Num_Write > 0) {
+ ArrayGroup *t1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) t1->insert(arrays[i]);
+ test_timestep(t1, arraysize[arrayrank-1], arrays);
+ delete t1;
+ if (Num_Simulate_Read + Num_Read > 0) {
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+ } else {
+
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+
+ // delete all objects created
+
+ for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+ free(arrays);
+ delete disk1;
+ delete mem1;
+ return(0);
+}
+
+char my_getopt(char *str)
+{
+ char command[23][15];
+
+ strcpy(command[0], "-Total_nodes");
+ strcpy(command[1], "-Io_nodes");
+ strcpy(command[2], "-upper");
+ strcpy(command[3], "-Arraysize");
+ strcpy(command[4], "-Esize");
+ strcpy(command[5], "-Mlayout");
+ strcpy(command[6], "-Dlayout");
+ strcpy(command[7], "-mem_dist");
+ strcpy(command[8], "-disk_dist");
+ strcpy(command[9], "-num_arrays");
+ strcpy(command[10], "-read_simulate");
+ strcpy(command[11], "-Read");
+ strcpy(command[12], "-write_simulate");
+ strcpy(command[13], "-Write");
+ strcpy(command[14], "-interleave");
+ strcpy(command[15], "-Cost_model");
+ strcpy(command[16], "-size_message");
+ strcpy(command[17], "-Xfactor");
+
+ for (int i= 0; i< 18; i++)
+ if (!strncmp(str, command[i], 2)) return command[i][1];
+ printf("undefined input %s, quit!\n",str);
+ exit(0);
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes,
+ int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+ int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+ Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode)
+{
+ char opt;
+ int k;
+
+ for (int i=1; i<argc; ) {
+ opt = my_getopt(argv[i++]);
+ switch(opt) {
+ case 'X':
+ STRATEGY = atoi(argv[i++]);
+ break;
+ case 'T':
+ total_nodes = atoi(argv[i++]);
+ break;
+ case 'I':
+ io_nodes = atoi(argv[i++]);
+ break;
+ case 'u':
+ upper_bound = atoi(argv[i++]);
+ break;
+ case 'A':
+ arrayrank = atoi(argv[i++]);
+ arraysize = (int *) malloc(sizeof(int)* arrayrank);
+ mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+ lower_bound = arraysize[k-1];
+ break;
+ case 'E':
+ esize = atoi(argv[i++]);
+ break;
+ case 'M':
+ mrank = atoi(argv[i++]);
+ mlayout = (int *) malloc(sizeof(int)* mrank);
+ for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+ break;
+ case 'D':
+ drank = atoi(argv[i++]);
+ dlayout = (int *) malloc(sizeof(int)* drank);
+ for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+ break;
+ case 'm':
+ for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'd':
+ for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'n':
+ Num_of_Arrays = atoi(argv[i++]);
+ break;
+ case 'r':
+ Num_Simulate_Read = atoi(argv[i++]);
+ break;
+ case 'R':
+ Num_Read = atoi(argv[i++]);
+ break;
+ case 'w':
+ Num_Simulate_Write = atoi(argv[i++]);
+ break;
+ case 'W':
+ Num_Write = atoi(argv[i++]);
+ break;
+ case 'i':
+ interleave = atoi(argv[i++]);
+ break;
+ case 'C':
+ cost_model_mode = atoi(argv[i++]);
+ break;
+ case 's':
+ SUBCHUNK_SIZE = atoi(argv[i++]);
+ break;
+ /* For Panda internal library stuff */
+
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int total_nodes; // The number of total nodes (comp + io)
+ int io_nodes; // The number of io nodes
+ int upper_bound; // The upper bound of the last dimension of the array
+ int lower_bound; // The starting number of the last dimension of the array
+ int arrayrank ; // The array rank.
+ int *arraysize; // The number of elements along each array dimention
+ int esize ; // element size of each array element
+ int mrank ; // Compute node mesh rank
+ int *mlayout; // Compute node mesh layout
+ int drank ; // IO node mesh rank
+ int cost_model_mode; // Whether the cost model is included.
+ int *dlayout; // IO node mesh layout
+ Distribution *mem_dist; // The memory array distribution along each dimention
+ // There are three possible distributions (BLOCK,
+ // NONE, CYCLIC).
+ Distribution *disk_dist; // The disk array distribution along each dimention
+ int my_rank, my_app_size, *world_ranks, leader;
+
+
+ MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from
+// this call, the io nodes will not return from the call. All the io nodes
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+
+ parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, arrayrank,
+ arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+
+ leader = io_nodes;
+ world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+ for(int i=0;i< my_app_size; i++)
+ world_ranks[i] = leader+i;
+
+ Panda * bear = new Panda(COMPUTE_NODE, 1, my_rank, my_app_size,
+ world_ranks);
+ global_bear = bear;
+
+ for (int size=lower_bound; size <= upper_bound; size*=2) {
+ arraysize[arrayrank-1] = size;
+ gemein(bear,io_nodes, arrayrank, arraysize, esize,
+ mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+ }
+ free(arraysize);
+ free(mlayout);
+ free(dlayout);
+ free(mem_dist);
+ free(disk_dist);
+ free(world_ranks);
+ delete bear;
+ MPI_Finalize();
+ return(0);
+}
diff --git a/src/Panda/configure b/src/Panda/configure
new file mode 100644
index 0000000..f34cfdd
--- /dev/null
+++ b/src/Panda/configure
@@ -0,0 +1,75 @@
+#!/bin/sh
+# this is a script that is intended to guide the procession of
+# our makefiles in an independent way across multiple OS platforms
+# and multiple hardware platform...(first for sun)
+echo "checking out target machine:"
+X="os-detected"
+MY_OS=""
+
+if [ `uname -a | fgrep -i sun | wc -l` -ne 0 ] ; then
+ MY_OS="sunos"
+fi
+
+if [ `uname -a | fgrep -i aix | wc -l` -ne 0 ] ; then
+ MY_OS="aix"
+fi
+
+if [ `uname -a | fgrep -i hp-ux | wc -l` -ne 0 ] ; then
+ MY_OS="hp-ux"
+fi
+
+if [ `uname -a | fgrep -i irix | wc -l` -ne 0 ] ; then
+ MY_OS="irix"
+fi
+
+/bin/rm -fr makefile
+
+case $MY_OS in
+ "sunos")
+ echo " detected SunOS..."
+ echo $MY_OS > $X
+ echo "include makefile.sun.mpich" > makefile
+ ;;
+ "aix")
+ echo " detected AIX..."
+ echo $MY_OS > $X
+ echo "include makefile.ibm.mpif" > makefile
+ ;;
+ "irix")
+ echo " detected IRIX..."
+ echo $MY_OS > $X
+ echo "include makefile.sgi.mpich" > makefile
+ ;;
+ "hp-ux")
+ echo " detected HP-UX..."
+ echo $MY_OS > $X
+ echo "include makefile.hpux.mpich" > makefile
+ ;;
+ *)
+ echo "Hey, I don't know this operating system..."
+ echo $MY_OS > $X
+ echo "include makefile.unix.posix" > makefile
+ ;;
+esac
+
+cat makefile.proto >> makefile
+
+case $MY_OS in
+ "irix") # those folks busted "which"
+ FOUND_MPI=`which -f mpirun |wc |awk '{print $2}'`
+ ;;
+ *)
+ FOUND_MPI=`which mpirun |wc |awk '{print $2}'`
+ ;;
+esac
+
+if [ $FOUND_MPI -ne 0 ] ; then
+ echo " found MPI..."
+fi
+FP='/scratch-modi4/'`whoami`'/'
+mkdir $FP >/dev/null 2>&1
+echo " user temp directory $FP exists..."
+echo 'FILEPREFIXVAL=\"'$FP'\"' > fileprefix
+echo "the file \"makefile\" is now configured for target."
+
+exit 0
diff --git a/src/Panda/definitions.h b/src/Panda/definitions.h
new file mode 100644
index 0000000..32c4da0
--- /dev/null
+++ b/src/Panda/definitions.h
@@ -0,0 +1,186 @@
+#ifndef definitions_dot_h
+#define definitions_dot_h
+
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include "cctk.h"
+
+extern "C" {int fsync(int f);}
+
+
+
+#define START 0
+#define WAITING 1
+
+/* Different I/O strategies */
+#define SIMPLE_IO 1
+#define CSDIO_IO 2
+
+/* The different possible nodetypes */
+#define COMPUTE_NODE 0
+#define IO_NODE 1
+#define PART_TIME_COMPUTE 2
+#define PART_TIME_IO 3
+#define SUB_CHUNK 4
+#define PART_TIME 5
+
+
+/* Unix or MPI based file system */
+#define MPI_SYSTEM 0
+#define UNIX_SYSTEM 1
+
+/* Different kinds of collective I/O operations */
+#define RESTART 0
+#define READ_TIMESTEP 1
+#define GENERAL_READ 2
+#define CHECKPOINT 3
+#define TIMESTEP 4
+#define GENERAL_WRITE 5
+
+
+/* Tags to indicate the type of the message */
+
+/* #define NO_MESSAGE 10
+ #define SPECIAL 9
+ #define ARRAYGROUP_SCHEMA 8
+ #define CHUNK_DATA_TO_IO 7
+ #define APP_IO_DONE 6
+ #define QUIT 5
+ #define COMP_QUIT 4
+ #define CHUNK_SCHEMA 3
+ #define CHUNK_DATA_FROM_IO 2
+ #define CHUNK_SCHEMA_DATA 1
+*/
+/* Modified it to make it compatible with my thesis */
+#define CHUNK_SCHEMA 1
+#define CHUNK_DATA_FROM_IO 2
+#define CHUNK_DATA_TO_IO 3
+
+#define COMP_QUIT 4
+#define QUIT 5
+#define ATTRIBUTE_SCHEMA 6
+#define ATTRIBUTE_DATA 7
+
+#define ARRAYGROUP_SCHEMA 8
+#define SPECIAL 9
+#define NO_MESSAGE 10
+
+/* Tags to indicate the type of special operatiosn required */
+#define APP_INFO 1
+#define APP_BARRIER 2
+#define GLOBAL_BARRIER 3
+#define CLEANFILES 4
+#define FLUSHFILES 5
+#define CREATEFILES 6
+
+typedef enum { UNSET,
+ Regular,
+ Irregular
+ } Distribution_Type;
+
+typedef enum { NONE,
+ BLOCK,
+ GENERAL,
+ CYCLIC
+ } Distribution;
+
+typedef enum { HPF,
+ NAS,
+ GENERAL_BLOCK
+ } Block_Distribution;
+
+typedef enum { ROUND_ROBIN,
+ REGULAR
+ } ChunkAllocPolicy;
+
+
+typedef enum { NO = 0,
+ YES = 1
+ } Boolean;
+
+
+typedef enum { ALLOC,
+ NO_ALLOC,
+ SHARED
+ } DataStatus;
+
+
+
+inline int max(int a, int b)
+{
+ if (a > b) return a;
+ else return b;
+}
+
+inline int min(int a, int b)
+{
+ if (a < b) return a;
+ else return b;
+}
+
+
+inline int* copy_int_list(int s, int *l)
+{
+ int *ret_list = (int *) malloc(sizeof(int)*s);
+ for(int i=0;i<s;i++)
+ ret_list[i] = l[i];
+ return ret_list;
+}
+
+
+
+inline Distribution* copy_distribution(int num, Distribution *ptr)
+{
+ Distribution *ret_list = (Distribution *)malloc(sizeof(Distribution)*num);
+
+ for(int i=0; i < num; i++)
+ ret_list[i] = ptr[i];
+
+ return ret_list;
+}
+
+
+inline Boolean equal_distribution(int size, Distribution* dist1, Distribution* dist2)
+{
+ for(int i=0; i < size; i++)
+ {
+ if (dist1[i] != dist2[i])
+ return NO;
+ }
+ return YES;
+}
+
+inline void pack_distribution(int **schema_buf, int rank, Distribution *in_dist)
+{
+ Distribution *dist = in_dist;
+ int* ptr = *schema_buf;
+
+ for(int i=0;i<rank;i++)
+ *ptr++ = (int) dist[i];
+ *schema_buf = ptr;
+}
+
+inline Distribution* new_distribution(int **schema_buf, int rank)
+{
+ Distribution *dist = (Distribution*) malloc(sizeof(Distribution)*rank);
+ int *ptr = *schema_buf;
+
+ for(int i=0;i<rank;i++)
+ dist[i] = (Distribution) *ptr++;
+
+ *schema_buf = ptr;
+ return dist;
+
+}
+
+inline int num_elements(int r, int *size)
+{
+ int total=1;
+ for(int i=0;i<r;i++) total *= size[i];
+ return total;
+}
+
+
+#endif
diff --git a/src/Panda/fulltime.C b/src/Panda/fulltime.C
new file mode 100644
index 0000000..dd195f0
--- /dev/null
+++ b/src/Panda/fulltime.C
@@ -0,0 +1,410 @@
+/*****************************************************************
+ * This is a sample program that shows how the panda library *
+ * is going to be used by the application programs. *
+ * The example command line format is in test7.script. *
+ * This example shows the interface with only disk layout *
+ * info but no stride or subchunking schema. The value for *
+ * those schemas use the default ones. *
+ * The current test varies the size of arrays. However, the *
+ * wrapper function allows the number of the nodes to be *
+ * changed as well. *
+ * The first iteration loads all the code in memory. *
+ * The second run does the simulated disk simulation. *
+ * From the third run on, the values are the real writes. *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+int BLK;
+
+int CYCLIC_ON_MEM = 0;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag=0;
+ char time_message[100];
+
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+ t1->set_verify();
+#endif
+
+
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+
+ for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+ if (i < Num_Simulate_Write){
+ t1->set_simulate_mode();
+ flag=0;
+ }
+ else {
+ t1->reset_simulate_mode();
+ flag=1;
+ }
+
+
+ global_bear->app_barrier();
+ t1->set_io_strategy(STRATEGY);
+ timer.start();
+ t1->timestep();
+ timer.stop(":");
+ sprintf(time_message,"%s Write: SIZE: %d, BLK: %d, Time %i %s",
+ (flag==0? "Simulated":"Real"),
+ arraysize, BLK, i, timer.get_description());
+ printf("%s", time_message);
+
+ if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1 ) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+ }
+ }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag;
+ char time_message[100];
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern();
+#endif
+
+ if (Num_Write + Num_Simulate_Write == 0) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+ }
+
+
+
+ for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+ if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+ else {r1->reset_simulate_mode();
+ flag=1;
+ global_bear->flushfiles();
+ }
+
+
+ global_bear->app_barrier();
+ r1->set_io_strategy(STRATEGY);
+ timer.start();
+ r1->read_timestep();
+ timer.stop(":");
+
+ sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ",
+ (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+ for(i=0;i<Num_of_Arrays;i++)
+ if (arrays[i]->verify_byte_pattern())
+ printf("Byte pattern verified for array %d\n", i);
+ else
+ printf("Byte pattern incorrect for array %d\n", i);
+#endif
+ global_bear->cleanfiles();
+}
+
+
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+ int mrank, int *mlayout, int drank, int *dlayout,
+ Distribution *mem_dist, int* blk_size, Distribution *disk_dist, int cost_model)
+{
+ ArrayLayout *mem1; // Memory array layout
+ ArrayLayout *disk1; // Disk array layout
+ int i;
+ Array **arrays;
+ arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+ mem1 = new ArrayLayout (mrank,mlayout);
+ disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation.
+ char *name;
+ name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+ char temp[5];
+ for (i=0; i< Num_of_Arrays; i++) {
+ strcpy(name,"z1Array");
+ sprintf(temp, "%d", i);
+ strcat(name, temp);
+ arrays[i] = new Array(name,arrayrank,arraysize,esize,
+ mem1,mem_dist,disk1, disk_dist);
+ }
+ free(name);
+
+ if (Num_Simulate_Write + Num_Write > 0) {
+ ArrayGroup *t1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) t1->insert(arrays[i]);
+ test_timestep(t1, arraysize[arrayrank-1], arrays);
+ delete t1;
+ if (Num_Simulate_Read + Num_Read > 0) {
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+ } else {
+
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+
+ // delete all objects created
+
+ for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+ free(arrays);
+ delete disk1;
+ delete mem1;
+ return(0);
+}
+
+char my_getopt(char *str)
+{
+ char command[24][15];
+
+ strcpy(command[0], "-Total_nodes");
+ strcpy(command[1], "-Io_nodes");
+ strcpy(command[2], "-upper");
+ strcpy(command[3], "-Arraysize");
+ strcpy(command[4], "-Esize");
+ strcpy(command[5], "-Mlayout");
+ strcpy(command[6], "-Dlayout");
+ strcpy(command[7], "-mem_dist");
+ strcpy(command[8], "-disk_dist");
+ strcpy(command[9], "-num_arrays");
+ strcpy(command[10], "-read_simulate");
+ strcpy(command[11], "-Read");
+ strcpy(command[12], "-write_simulate");
+ strcpy(command[13], "-Write");
+ strcpy(command[14], "-interleave");
+ strcpy(command[15], "-Cost_model");
+ strcpy(command[16], "-size_message");
+ strcpy(command[17], "-Xfactor");
+ strcpy(command[18], "-K");
+
+ for (int i= 0; i< 24; i++)
+ if (!strncmp(str, command[i], 2)) return command[i][1];
+ printf("undefined input %s, quit!\n",str);
+ exit(0);
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes,
+ int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+ int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+ Distribution*& mem_dist, int*& blk_size,
+ Distribution*& disk_dist, int &cost_model_mode, int &upper_blk)
+{
+ char opt;
+ int k;
+
+ for (int i=1; i<argc; ) {
+ opt = my_getopt(argv[i++]);
+ switch(opt) {
+ case 'X':
+ STRATEGY = atoi(argv[i++]);
+ break;
+ case 'T':
+ total_nodes = atoi(argv[i++]);
+ break;
+ case 'I':
+ io_nodes = atoi(argv[i++]);
+ break;
+ case 'u':
+ upper_bound = atoi(argv[i++]);
+ break;
+ case 'A':
+ arrayrank = atoi(argv[i++]);
+ arraysize = (int *) malloc(sizeof(int)* arrayrank);
+ mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ blk_size = (int *) malloc(sizeof(int)*arrayrank);
+ disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+ lower_bound = arraysize[k-1];
+ break;
+ case 'E':
+ esize = atoi(argv[i++]);
+ break;
+ case 'M':
+ mrank = atoi(argv[i++]);
+ mlayout = (int *) malloc(sizeof(int)* mrank);
+ for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+ break;
+ case 'D':
+ drank = atoi(argv[i++]);
+ dlayout = (int *) malloc(sizeof(int)* drank);
+ for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+ break;
+ case 'm':
+ for (k = 0; k < arrayrank; k++)
+ {
+ mem_dist[k] = (Distribution)atoi(argv[i++]);
+ }
+ break;
+ case 'd':
+ for (k = 0; k < arrayrank; k++)
+ {
+ disk_dist[k] = (Distribution)atoi(argv[i++]);
+ }
+ break;
+
+ case 'n':
+ Num_of_Arrays = atoi(argv[i++]);
+ break;
+ case 'r':
+ Num_Simulate_Read = atoi(argv[i++]);
+ break;
+ case 'R':
+ Num_Read = atoi(argv[i++]);
+ break;
+ case 'w':
+ Num_Simulate_Write = atoi(argv[i++]);
+ break;
+ case 'W':
+ Num_Write = atoi(argv[i++]);
+ break;
+ case 'i':
+ interleave = atoi(argv[i++]);
+ break;
+ case 'C':
+ cost_model_mode = atoi(argv[i++]);
+ break;
+ case 's':
+ SUBCHUNK_SIZE = atoi(argv[i++]);
+ break;
+
+ case 'K':
+ upper_blk = atoi(argv[i++]);
+ break;
+
+ /* For Panda internal library stuff */
+
+
+ }
+ }
+
+ printf("####### io nodes=%d \n", io_nodes);
+}
+
+int main(int argc, char **argv)
+{
+ int total_nodes; // The number of total nodes (comp + io)
+ int io_nodes; // The number of io nodes
+ int upper_bound; // The upper bound of the last dimension of the array
+ int lower_bound; // The starting number of the last dimension of the array
+ int arrayrank ; // The array rank.
+ int *arraysize; // The number of elements along each array dimention
+ int esize ; // element size of each array element
+ int mrank ; // Compute node mesh rank
+ int *mlayout; // Compute node mesh layout
+ int drank ; // IO node mesh rank
+ int cost_model_mode; // Whether the cost model is included.
+ int *dlayout; // IO node mesh layout
+ Distribution *mem_dist; // The memory array distribution along each dimention
+ // There are three possible distributions (BLOCK,
+ // NONE, CYCLIC).
+ int *blk_size;
+ Distribution *disk_dist; // The disk array distribution along each dimention
+ int my_rank, my_app_size, *world_ranks, leader;
+
+ int upper_blk; // upper bound of the block size
+ int lower_blk;
+ Panda *bear;
+
+
+ MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from
+// this call, the io nodes will not return from the call. All the io nodes
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+ leader = 0;
+ world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+
+ parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound,
+ arrayrank, arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist,
+ blk_size, disk_dist, cost_model_mode, upper_blk);
+
+ int q = total_nodes/io_nodes;
+
+ for (int i=0; i<io_nodes; i++)
+ world_ranks[i] = i*q;
+ for (int j=0; j<io_nodes; j++)
+ for (int k=1; k< q; k++)
+ world_ranks[i++] = j*q + k;
+
+/*
+ world_ranks[0] = 0;
+ world_ranks[1] = 3;
+ world_ranks[2] = 1;
+ world_ranks[3] = 2;
+ world_ranks[4] = 4;
+ world_ranks[5] = 5;
+
+ printf("myrank= %d, io_nodes=%d, total_nodes=%d \n",
+ my_rank, io_nodes, total_nodes);
+*/
+ printf("world ranks \n");
+ for (i=0; i<my_app_size; i++)
+ printf(" %d", world_ranks[i]);
+ printf("\n\n");
+
+ if (my_rank % q == 0)
+ { // io nodes
+ bear = new Panda(IO_NODE, 0, my_rank/q, io_nodes, world_ranks);
+ global_bear = bear;
+ }
+ else
+ { // compute nodes
+ bear = new Panda(COMPUTE_NODE, 1,
+ my_rank/q*(q-1)+(my_rank-1)%q,
+ my_app_size-io_nodes, world_ranks+io_nodes);
+ global_bear = bear;
+
+ for (int size=lower_bound; size <= upper_bound; size*=2)
+ {
+ arraysize[arrayrank-1] = size;
+ gemein(bear,io_nodes, arrayrank, arraysize, esize,
+ mrank, mlayout, drank, dlayout, mem_dist, blk_size,
+ disk_dist, cost_model_mode);
+ }
+
+ }
+
+ free(arraysize); free(mlayout); free(dlayout); free(mem_dist);
+ free(blk_size);
+ free(disk_dist);
+ free(world_ranks);
+ delete bear;
+
+ MPI_Finalize();
+ return(0);
+}
diff --git a/src/Panda/io_main.C b/src/Panda/io_main.C
new file mode 100644
index 0000000..69b0a63
--- /dev/null
+++ b/src/Panda/io_main.C
@@ -0,0 +1,83 @@
+#include "definitions.h"
+#include "StopWatch.h"
+#include "Panda.h"
+#include "ArrayGroup.h"
+
+extern MPIFS* MPIFS_global_obj;
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+Boolean shared_flag = NO;
+
+
+char my_getopt(char *str)
+{
+ char command[8][15];
+
+ strcpy(command[0], "-chunks");
+ strcpy(command[1], "-xmax_messages");
+ strcpy(command[2], "-tags");
+ strcpy(command[3], "-branching_factor");
+ strcpy(command[4], "-ymax_memory");
+ strcpy(command[5], "-flag");
+ strcpy(command[6], "-size_message");
+ strcpy(command[7], "-Shared");
+
+ for (int i= 0; i< 8; i++)
+ if (!strncmp(str, command[i], 2)) return command[i][1];
+ printf("undefined input %s, quit!\n",str);
+ exit(0);
+}
+
+void parse_cl(int argc, char **argv)
+{
+ char opt;
+
+ for(int i=1; i< argc; ){
+ opt = my_getopt(argv[i++]);
+ switch(opt) {
+ case 'b' :
+ BRANCHING_FACTOR = atoi(argv[i++]);
+ break;
+ case 's':
+ SUBCHUNK_SIZE = atoi(argv[i++]);
+ break;
+ case 'S':
+ shared_flag = (Boolean) atoi(argv[i++]);
+ break;
+ }
+ }
+}
+
+main(int argc, char **argv)
+{
+ int *world_ranks, my_rank, leader, app_size;
+ MPI_Init(&argc, &argv);
+ Panda *bear;
+ char cmd[100];
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ sprintf(cmd , "rm -rf %s", FILEPREFIX);
+ //if (my_rank == 0)
+ system(cmd);
+ sprintf(cmd , "mkdir %s", FILEPREFIX);
+ //if (my_rank == 0)
+ system(cmd);
+ MPI_Comm_size(MPI_COMM_WORLD, &app_size);
+ world_ranks = (int *) malloc(sizeof(int)*app_size);
+ leader = 0;
+
+ for(int i=0;i< app_size; i++)
+ world_ranks[i] = leader+i;
+ parse_cl(argc, argv);
+ if (shared_flag){
+ bear = new Panda(IO_NODE, 0, my_rank, app_size,
+ world_ranks, YES);
+ }
+ else {
+ bear = new Panda(IO_NODE, 0, my_rank, app_size, world_ranks);
+ }
+ delete bear;
+// sprintf(cmd , "rm -rf %s", FILEPREFIX); if (my_rank == 0) system(cmd);
+ MPI_Finalize();
+}
+
diff --git a/src/Panda/make.code.defn b/src/Panda/make.code.defn
new file mode 100644
index 0000000..dd9edc3
--- /dev/null
+++ b/src/Panda/make.code.defn
@@ -0,0 +1,77 @@
+SRCS = App_Info.C Array.C ArrayDistribution.C ArrayLayout.C Attribute.C Chunk.C Collective_IO.C List.C MPIFS.C Panda.C Simple_IO.C Template.C VirtFS.C c_interface.C
+
+SUBDIRS =
+
+# The 9000 names of the cygwin tools and T3E...
+TMPUN := $(shell uname)
+ifeq ($(TMPUN), CYGWIN32_95)
+UNAME = CYGWIN
+else
+ifeq ($(TMPUN), CYGWIN32_NT)
+UNAME = CYGWIN
+else
+ifeq ($(TMPUN), CYGWIN_NT-4.0)
+UNAME = CYGWIN
+else
+UNAME := $(shell uname | perl -pe 's/(sn\d\d\d\d|jsimpson)/UNICOS\/mk/')
+endif
+endif
+endif
+
+# 64 Bit Irix
+ifeq ($(UNAME), IRIX64)
+
+CXXFLAGS += -DANSI -DFILEPREFIX -ptused
+
+endif
+
+# 32 Bit Irix
+ifeq ($(UNAME), IRIX)
+
+CXXFLAGS += -DANSI -ptused
+
+endif
+
+# HP
+ifeq ($(UNAME), HP-UX)
+
+CXXFLAGS += -DANSI -DHP
+
+endif
+
+# Alpha
+ifeq ($(UNAME), OSF1)
+
+CXXFLAGS += -DANSI
+
+endif
+
+# Linux
+ifeq ($(UNAME), Linux)
+
+CXXFLAGS += -DANSI
+
+endif
+
+# Macintosh /PowerMach-MachTen
+ifeq ($(UNAME), machten)
+
+CXXFLAGS += -DANSI
+
+endif
+
+# Cygwin / Win32
+ifeq ($(UNAME), CYGWIN)
+
+CFLAGS += -DANSI -DWIN32
+CXXFLAGS += -DANSI -DWIN32
+
+endif
+
+# T3E
+ifeq ($(UNAME), UNICOS/mk)
+
+CXXFLAGS += -DANSI -DT3E -hinstantiate=used
+
+endif
+
diff --git a/src/Panda/makefile.hpux.mpich b/src/Panda/makefile.hpux.mpich
new file mode 100644
index 0000000..6ad74c5
--- /dev/null
+++ b/src/Panda/makefile.hpux.mpich
@@ -0,0 +1,19 @@
+# makefile part for hpux (yong 8/3/95)
+include fileprefix
+MPICH_HOME = /extra/ying/mpich
+MPIRUN_HOME = /extra/ying/mpirun
+INCLUDE_DIR = -I$(MPICH_HOME)/include -I$(MPIRUN_HOME)/include
+WGEN_DIR = /extra/ying/mpich/profiling/wrappergen
+LIBS = -L$(MPIRUN_HOME)/lib -lmpirun -L$(MPICH_HOME)/lib/$(ARCH)/$(COMM) -lmpi -lm -lV3
+#LIBS = -L$(MPIRUN_HOME)/lib -lmpirun -L$(MPICH_HOME)/lib/$(ARCH)/$(COMM) -lmpi -lpmpi -lm -lV3
+#MPILIB = $(MPICH_HOME)/lib/$(ARCH)/$(COMM)/libmpi.a
+DEVICE = ch_p4
+COMM = ch_p4
+ARCH = hpux
+AR = /usr/gnu/bin/ar # for aix, but also pretty standard
+CC = gcc
+OPTFLAGS = -g -Wall
+CFLAGS = -DMPID_NO_FORTRAN -DHAS_XDR=1 \
+ -DHAVE_STDLIB_H=1 \
+ -DHAVE_SYSTEM=1 $(OPTFLAGS) $(INCLUDE_DIR) -DMPI_$(ARCH) \
+ -DTARGETHPUX -DFILEPREFIX=$(FILEPREFIXVAL)
diff --git a/src/Panda/makefile.ibm.mpif b/src/Panda/makefile.ibm.mpif
new file mode 100644
index 0000000..a51b052
--- /dev/null
+++ b/src/Panda/makefile.ibm.mpif
@@ -0,0 +1,11 @@
+# makefile part for aix with our MPIFS filesystem on MPIF (jozwiak 030795)
+include fileprefix
+INCLUDE_DIR = -I/usr/local/include/ibm-mpi
+LIBS = -lm -L/usr/local/lib/ibm-mpi -lmpirun
+AR = /bin/ar # for aix
+CC = mpCC
+#CC = ./mpifxlC
+OPTFLAGS = -g -DCOST_MODEL
+CFLAGS = $(OPTFLAGS) $(INCLUDE_DIR) \
+ -DTARGETAIX -DFILEPREFIX=$(FILEPREFIXVAL) \
+ -DWRAPPERTEST -DNAS_MPIF
diff --git a/src/Panda/makefile.proto b/src/Panda/makefile.proto
new file mode 100644
index 0000000..17805b3
--- /dev/null
+++ b/src/Panda/makefile.proto
@@ -0,0 +1,96 @@
+# makefile on 3-7-95 for C++ version of panda
+
+# REMOVE # for the intended build (NOTE: # is a comment, unlike for C)
+# include makefile.ibm.mpif
+# include makefile.ibm.mpich # this one is flakey, use mpif
+# include makefile.sun.mpich
+# include makefile.unix.posix
+
+ARCHIVE = libeegads.a
+OFILES = Array.o Chunk.o Simple_IO.o Panda.o \
+ ArrayLayout.o List.o Collective_IO.o \
+ MPIFS.o Attribute.o ArrayDistribution.o \
+ Template.o VirtFS.o App_Info.o c_interface.o
+CFILES =
+
+all: $(ARCHIVE)
+
+$(ARCHIVE): $(OFILES)
+ $(AR) crv $(ARCHIVE) $(OFILES)
+
+Array.o: Array.C Array.h Template.h List.h MPIFS.h ArrayLayout.h definitions.h
+ $(CC) $(CFLAGS) -c Array.C
+ArrayGroup.o: ArrayGroup.C ArrayGroup.h ArrayGroup.h MPIFS.h definitions.h
+ $(CC) $(CFLAGS) -c ArrayGroup.C
+List.o: List.C List.h definitions.h
+ $(CC) $(CFLAGS) -c List.C
+ArrayLayout.o: ArrayLayout.C ArrayLayout.h Template.h definitions.h
+ $(CC) $(CFLAGS) -c ArrayLayout.C
+Template.o: Template.C Template.h definitions.h
+ $(CC) $(CFLAGS) -c Template.C
+VirtFS.o: VirtFS.C VirtFS.h
+ $(CC) -c $(CFLAGS) VirtFS.C
+MPIFS.o: MPIFS.C MPIFS.h VirtFS.h Array.h Collective_IO.h Simple_IO.h definitions.h App_Info.h message.h
+ $(CC) -c $(CFLAGS) MPIFS.C
+Panda.o: Panda.C Panda.h VirtFS.h MPIFS.h definitions.h
+ $(CC) -c $(CFLAGS) Panda.C
+Chunk.o: Chunk.C Chunk.h ArrayLayout.h Array.h definitions.h
+ $(CC) -c $(CFLAGS) Chunk.C
+Collective_IO.o: Collective_IO.C Collective_IO.h definitions.h
+ $(CC) -c $(CFLAGS) Collective_IO.C
+Simple_IO.o: Simple_IO.C Simple_IO.h Collective_IO.h Array.h MPIFS.h definitions.h message.h
+ $(CC) -c $(CFLAGS) Simple_IO.C
+CSDIO.o: CSDIO.C CSDIO.h Simple_IO.h Collective_IO.h Array.h MPIFS.h definitions.h message.h
+ $(CC) -c $(CFLAGS) CSDIO.C
+Shared_IO.o: Shared_IO.C Shared_IO.h Simple_IO.h Collective_IO.h Array.h MPIFS.h definitions.h message.h
+ $(CC) -c $(CFLAGS) Shared_IO.C
+CSDIO_Shared.o: CSDIO_Shared.C CSDIO_Shared.h CSDIO.h Simple_IO.h Collective_IO.h ArrayGroup.h Array.h MPIFS.h definitions.h message.h
+ $(CC) -c $(CFLAGS) CSDIO_Shared.C
+App_Info.o: App_Info.C App_Info.h definitions.h
+ $(CC) -c $(CFLAGS) App_Info.C
+c_interface.o: c_interface.C c_interface.h
+ $(CC) -c $(CFLAGS) c_interface.C
+Attribute.o: Attribute.C Attribute.h
+ $(CC) -c $(CFLAGS) Attribute.C
+ArrayDistribution.o: ArrayDistribution.C ArrayDistribution.h
+ $(CC) -c $(CFLAGS) ArrayDistribution.C
+
+
+## Hey, Kent, how should we verify a build is indeed correct?
+## it seems that there is sort of a chicken and egg problem
+## here: we need a manually verified set of correct runs
+## against which to test later builds and test runs...
+## i set up the little bit below so that one can do a
+## `make test' to verify a corrrect build ...
+
+oneexe: oneexe.C $(ARCHIVE)
+ $(CC) $(CFLAGS) oneexe.C -o oneexe -L. -leegads $(LIBS)
+
+io_main: io_main.C $(ARCHIVE)
+ $(CC) $(CFLAGS) io_main.C -o io_main -L. -leegads $(LIBS)
+
+compute_test: compute_test.C $(ARCHIVE)
+ $(CC) $(CFLAGS) compute_test.C -o compute_test -L. -leegads $(LIBS)
+
+part_test: part_test.C $(ARCHIVE)
+ $(CC) $(CFLAGS) part_test.C -o part_test -L. -leegads $(LIBS)
+
+shared_test: shared_test.C $(ARCHIVE)
+ $(CC) $(CFLAGS) shared_test.C -o shared_test -L. -leegads $(LIBS)
+
+cleantests: ;
+ - /bin/rm -f core
+ - /bin/rm -f $(TESTDIR) io_main compute_test part_test shared_test oneexe
+ sync
+
+clean: cleantests
+ - /bin/rm -f $(OFILES) $(ARCHIVE)
+# - /bin/rm -f *~ PI* os-detected a.out mpi_test core *.o
+# - /bin/rm -f mputil.mp*.c makefile fileprefix mpirun.*
+ - /bin/rm -f makefile fileprefix
+ sync
+
+configure: ; @echo "already configured, or this makefile wouldn't be here"
+ @echo "to reconfigure, make clean, then sh configure"
+
+
diff --git a/src/Panda/makefile.sgi.mpich b/src/Panda/makefile.sgi.mpich
new file mode 100644
index 0000000..f9071df
--- /dev/null
+++ b/src/Panda/makefile.sgi.mpich
@@ -0,0 +1,10 @@
+# makefile part for aix with our MPIFS filesystem on MPIF (jozwiak 030795)
+include fileprefix
+Cactus_HOME = ../../..
+INCLUDE_DIR = -I/usr/include -I$(Cactus_HOME)/lib/IEEEIO
+LIBS = -lmpi -L$(Cactus_HOME)/irix6/obj -lieeeio
+AR = /usr/bin/ar # for aix
+CC = CC
+OPTFLAGS = -g
+CFLAGS = $(OPTFLAGS) $(INCLUDE_DIR) \
+ -DFILEPREFIX=$(FILEPREFIXVAL)
diff --git a/src/Panda/makefile.sun.mpich b/src/Panda/makefile.sun.mpich
new file mode 100644
index 0000000..4d00846
--- /dev/null
+++ b/src/Panda/makefile.sun.mpich
@@ -0,0 +1,18 @@
+# makefile part for bunny with our MPIFS filesystem on MPICH (jozwiak 030795)
+include fileprefix
+MPIR_HOME = /home2/panda/MPI/mpich
+INCLUDE_DIR = -I$(MPIR_HOME)/include
+LIBS = -L/home2/panda/MPI/mpich/lib/sun4/ch_p4 -lmpirun -lmpi -lm
+MPILIB = $(MPIR_HOME)/lib/$(ARCH)/$(COMM)/libmpi.a
+DEVICE = ch_p4
+COMM = ch_p4
+ARCH = sun4
+AR = /usr/5bin/ar # for sunos (bsd)
+CC = gcc
+OPTFLAGS = -g -Wall
+CFLAGS = -DMPID_NO_FORTRAN -DHAS_XDR=1 \
+ -DHAVE_STDLIB_H=1 -DNAS_MPIF\
+ -DHAVE_SYSTEM=1 $(OPTFLAGS) $(INCLUDE_DIR) -DMPI_$(ARCH) \
+ -DTARGETSUNOS -DFILEPREFIX=$(FILEPREFIXVAL) \
+ -DWRAPPERTEST -DMPICH
+# -DVERIFYBF -DDEBUG
diff --git a/src/Panda/message.h b/src/Panda/message.h
new file mode 100644
index 0000000..f76998f
--- /dev/null
+++ b/src/Panda/message.h
@@ -0,0 +1,81 @@
+#ifndef message_dot_h
+#define message_dot_h
+
+
+inline void send_message(void *buf, int count, MPI_Datatype data_type,
+ int dest, int tag, MPI_Comm comm)
+{
+ MPI_Send(buf,count,data_type,dest,tag,comm);
+#ifdef DEBUG
+ printf("Sending message to %d of size %d with tag %d\n",
+ dest, count, tag);
+#endif
+}
+
+inline void nb_send_message(void *buf, int count, MPI_Datatype data_type,
+ int dest, int tag, MPI_Comm comm, MPI_Request *request)
+{
+ MPI_Isend(buf,count,data_type,dest,tag,comm, request);
+#ifdef DEBUG
+ printf("Sending nonblocking message to %d of size %d with tag %d\n",
+ dest, count, tag);
+#endif
+}
+
+
+inline void receive_message(void *buf, int count, MPI_Datatype datatype,
+ int src, int tag, MPI_Comm comm, MPI_Status *status)
+{
+ MPI_Recv(buf,count,datatype, src,tag,comm,status);
+#ifdef DEBUG
+ printf("Received message from %d of size %d with tag %d\n",
+ src, count, tag);
+#endif
+}
+
+
+inline void nb_receive_message(void *buf, int count, MPI_Datatype datatype,
+ int src, int tag, MPI_Comm comm, MPI_Request *request)
+{
+ MPI_Irecv(buf,count,datatype, src,tag,comm,request);
+#ifdef DEBUG
+ printf("Post a non-blocking receive for %d of size %d with tag %d\n",
+ src, count, tag);
+#endif
+}
+
+
+inline void mpi_test(MPI_Request *request, int *flag, MPI_Status *status)
+{
+ MPI_Test(request, flag, status);
+}
+
+
+inline void mpi_get_count(MPI_Status *status, MPI_Datatype datatype, int *len)
+{
+ MPI_Get_count(status, datatype, len);
+}
+
+
+inline void any_new_message(int *msg_code, int *msg_src,
+ int *msg_tag,MPI_Status *msg_status)
+{
+ int flag;
+
+ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, msg_status);
+ if (!flag){
+ *msg_code = NO_MESSAGE;
+ *msg_src = -1;
+ *msg_tag = -1;
+ return;
+ }
+ else{
+ /* There some message waiting for us */
+ *msg_tag = msg_status->MPI_TAG;
+ *msg_src = msg_status->MPI_SOURCE;
+ *msg_code = msg_status->MPI_TAG % 10;
+ return;
+ }
+}
+
+#endif
diff --git a/src/Panda/oneexe.C b/src/Panda/oneexe.C
new file mode 100644
index 0000000..f9b6b07
--- /dev/null
+++ b/src/Panda/oneexe.C
@@ -0,0 +1,91 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi.h"
+#include "IO.h"
+#include "c_interface.h"
+
+extern "C" { int Panda_Create(int, char **, int, int); }
+extern "C" { void Panda_Finalize(); }
+extern "C" { void Panda_WriteAttribute(char *, char *, int, int, void *); }
+extern "C" { void *Panda_ReadAttribute(char *, char *, int *, int *); }
+extern "C" { void PandaTimestep(struct ArrayInfo *); }
+extern "C" { char *PandaReadTimestep(struct ArrayInfo *); }
+
+int main(int argc, char **argv)
+{
+ int my_rank, i, j, k;
+ ArrayInfo ainfo;
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+// Panda_Create(argc, argv, 2, 1);
+ if (Panda_Create(argc, argv, 2, 0)) { MPI_Finalize(); return 1; }
+
+ // Timestep-write
+/* int size[3] = {16, 4, 4};
+ int mem_layout[3] = {2, 1, 1};
+ Distribution mem_dist[3] = {BLOCK, BLOCK, BLOCK};
+ int disk_layout[1] = {2};
+ Distribution disk_dist[3] = {BLOCK, NONE, NONE};
+ int *data = (int *)malloc(sizeof(int) * 128);
+ ainfo.name_ = "./panda.out";
+ ainfo.rank_ = 3;
+ ainfo.size_ = size;
+ ainfo.esize_ = INT32;
+ ainfo.mem_rank_ = 3;
+ ainfo.mem_layout_ = mem_layout;
+ ainfo.mem_dist_ = mem_dist;
+ ainfo.disk_rank_ = 1;
+ ainfo.disk_layout_ = disk_layout;
+ ainfo.disk_dist_ = disk_dist;
+ ainfo.data_ = (char*)data;
+
+ for (i=0; i<8; i++)
+ for (j=0; j<4; j++)
+ for (k=0; k<4; k++) data[i*16+j*4+k] = i*16+j*4+k + my_rank;
+
+ ainfo.stencil_width_ = 0;
+ PandaTimestep(&ainfo);
+ Panda_WriteAttribute("./panda.out", "global_size", INT32, 3, size);
+
+ printf("%d - ", my_rank);
+ for (i=0; i<8; i++)
+ for (j=0; j<4; j++)
+ for (k=0; k<4; k++) printf("%d ", data[i*16+j*4+k]);
+ printf("\n"); fflush(stdout);
+ free(data); */
+
+ // ReadTimeste-write
+ int mem_layout[3] = {2, 1, 1};;
+ Distribution mem_dist[3] = {BLOCK, BLOCK, BLOCK};
+ ainfo.name_ = "./panda.out";
+ ainfo.rank_ = 3;
+ ainfo.size_ = NULL;
+ ainfo.esize_ = 0;
+ ainfo.mem_rank_ = 3;
+ ainfo.mem_layout_ = mem_layout;
+ ainfo.mem_dist_ = mem_dist;
+ ainfo.disk_rank_ = 0;
+ ainfo.disk_layout_ = NULL;
+ ainfo.disk_dist_ = NULL;
+ ainfo.data_ = NULL;
+
+ int *data = (int *)PandaReadTimestep(&ainfo);
+
+ printf("%d - ", my_rank);
+ for (i=0; i<8; i++)
+ for (j=0; j<4; j++)
+ for (k=0; k<4; k++) printf("%d ", data[i*16+j*4+k]);
+ printf("\n"); fflush(stdout);
+ free(data);
+
+ int type, count;
+ int *data1 = (int *)Panda_ReadAttribute("./panda.out", "global_size",
+ &type, &count);
+ printf("%d: data type %d, count %d, contents: ", my_rank, type, count);
+ for (i=0; i<count; i++) printf("%d ", data1[i]);
+ printf("\n");
+ free(data1);
+
+ Panda_Finalize();
+ MPI_Finalize();
+}
diff --git a/src/Panda/os-detected b/src/Panda/os-detected
new file mode 100644
index 0000000..4f378d7
--- /dev/null
+++ b/src/Panda/os-detected
@@ -0,0 +1 @@
+irix
diff --git a/src/Panda/part_test.C b/src/Panda/part_test.C
new file mode 100644
index 0000000..03a7c2c
--- /dev/null
+++ b/src/Panda/part_test.C
@@ -0,0 +1,385 @@
+/*****************************************************************
+ * This is a sample program that shows how the panda library *
+ * is going to be used by the application programs. *
+ * The example command line format is in test7.script. *
+ * This example shows the interface with only disk layout *
+ * info but no stride or subchunking schema. The value for *
+ * those schemas use the default ones. *
+ * The current test varies the size of arrays. However, the *
+ * wrapper function allows the number of the nodes to be *
+ * changed as well. *
+ * The first iteration loads all the code in memory. *
+ * The second run does the simulated disk simulation. *
+ * From the third run on, the values are the real writes. *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+int world_rank;
+
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag=0;
+ char time_message[100];
+
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+ t1->set_verify();
+#endif
+
+
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+ for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+ if (i < Num_Simulate_Write){
+ t1->set_simulate_mode();
+ flag=0;
+ }
+ else {
+ t1->reset_simulate_mode();
+ flag=1;
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+ }
+
+
+ global_bear->app_barrier();
+ t1->set_io_strategy(STRATEGY);
+ timer.start();
+ t1->timestep();
+ timer.stop(":");
+ sprintf(time_message,"%s Write: SIZE: %d, Time %i %s",
+ (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+
+ if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1 ) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+ }
+ }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag;
+ char time_message[100];
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern();
+#endif
+
+ if (Num_Write + Num_Simulate_Write == 0) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+ }
+
+
+
+ for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+ if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+ else {r1->reset_simulate_mode();
+ flag=1;
+ global_bear->flushfiles();
+ }
+
+
+ global_bear->app_barrier();
+ r1->set_io_strategy(STRATEGY);
+ timer.start();
+ r1->restart();
+ timer.stop(":");
+
+ sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ",
+ (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+
+ }
+#ifdef VERIFYBF
+ for(i=0;i<Num_of_Arrays;i++)
+ if (arrays[i]->verify_byte_pattern())
+ printf("%d:Byte pattern verified for array %d\n", world_rank, i);
+ else
+ printf("%d:Byte pattern incorrect for array %d\n",world_rank,i);
+#endif
+ global_bear->cleanfiles();
+}
+
+
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+ int mrank, int *mlayout, int drank, int *dlayout,
+ Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+ ArrayLayout *mem1; // Memory array layout
+ ArrayLayout *disk1; // Disk array layout
+ int i;
+ Array **arrays;
+ arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+ mem1 = new ArrayLayout (mrank,mlayout);
+ disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation.
+ char *name;
+ name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+ char temp[5];
+ for (i=0; i< Num_of_Arrays; i++) {
+ strcpy(name,"z1Array");
+ sprintf(temp, "%d", i);
+ strcat(name, temp);
+ arrays[i] = new Array(name,arrayrank,arraysize,esize,
+ mem1,mem_dist,disk1, disk_dist);
+ }
+ free(name);
+
+ if (Num_Simulate_Write + Num_Write > 0) {
+ ArrayGroup *t1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) t1->insert(arrays[i]);
+ test_timestep(t1, arraysize[arrayrank-1], arrays);
+ delete t1;
+ if (Num_Simulate_Read + Num_Read > 0) {
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+ } else {
+
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+
+ // delete all objects created
+
+ for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+ free(arrays);
+ delete disk1;
+ delete mem1;
+ return(0);
+}
+
+char my_getopt(char *str)
+{
+ char command[25][15];
+
+ strcpy(command[0], "-Total_nodes");
+ strcpy(command[1], "-Io_nodes");
+ strcpy(command[2], "-upper");
+ strcpy(command[3], "-Arraysize");
+ strcpy(command[4], "-Esize");
+ strcpy(command[5], "-Mlayout");
+ strcpy(command[6], "-Dlayout");
+ strcpy(command[7], "-mem_dist");
+ strcpy(command[8], "-disk_dist");
+ strcpy(command[9], "-num_arrays");
+ strcpy(command[10], "-read_simulate");
+ strcpy(command[11], "-Read");
+ strcpy(command[12], "-write_simulate");
+ strcpy(command[13], "-Write");
+ strcpy(command[14], "-interleave");
+ strcpy(command[15], "-Cost_model");
+ strcpy(command[16], "-chunks");
+ strcpy(command[17], "-xmax_messages");
+ strcpy(command[18], "-tags");
+ strcpy(command[19], "-branching_factor");
+ strcpy(command[20], "-ymax_memory");
+ strcpy(command[21], "-flag");
+ strcpy(command[22], "-size_message");
+ strcpy(command[23], "-Xfactor");
+ strcpy(command[24], "-Optimize");
+
+ for (int i= 0; i< 25; i++)
+ if (!strncmp(str, command[i], 2)) return command[i][1];
+ printf("undefined input %s, quit!\n",str);
+ return NULL;
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes,
+ int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+ int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+ Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode)
+{
+ char opt;
+ int k;
+
+ for (int i=1; i<argc; ) {
+ opt = my_getopt(argv[i++]);
+ switch(opt)
+ {
+ case 'X':
+ STRATEGY = atoi(argv[i++]);
+ break;
+ case 'T':
+ total_nodes = atoi(argv[i++]);
+ break;
+ case 'I':
+ io_nodes = atoi(argv[i++]);
+ break;
+ case 'u':
+ upper_bound = atoi(argv[i++]);
+ break;
+ case 'A':
+ arrayrank = atoi(argv[i++]);
+ arraysize = (int *) malloc(sizeof(int)* arrayrank);
+ mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+ lower_bound = arraysize[k-1];
+ break;
+ case 'E':
+ esize = atoi(argv[i++]);
+ break;
+ case 'M':
+ mrank = atoi(argv[i++]);
+ mlayout = (int *) malloc(sizeof(int)* mrank);
+ for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+ break;
+ case 'D':
+ drank = atoi(argv[i++]);
+ dlayout = (int *) malloc(sizeof(int)* drank);
+ for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+ break;
+ case 'm':
+ for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'd':
+ for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'n':
+ Num_of_Arrays = atoi(argv[i++]);
+ break;
+ case 'r':
+ Num_Simulate_Read = atoi(argv[i++]);
+ break;
+ case 'R':
+ Num_Read = atoi(argv[i++]);
+ break;
+ case 'w':
+ Num_Simulate_Write = atoi(argv[i++]);
+ break;
+ case 'W':
+ Num_Write = atoi(argv[i++]);
+ break;
+ case 'i':
+ interleave = atoi(argv[i++]);
+ break;
+ case 'C':
+ cost_model_mode = atoi(argv[i++]);
+ break;
+ case 'b' :
+ BRANCHING_FACTOR = atoi(argv[i++]);
+ break;
+ case 's':
+ SUBCHUNK_SIZE = atoi(argv[i++]);
+ break;
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ int total_nodes; // The number of total nodes (comp + io)
+ int io_nodes; // The number of io nodes
+ int upper_bound; // The upper bound of the last dimension of the array
+ int lower_bound; // The starting number of the last dimension of the array
+ int arrayrank ; // The array rank.
+ int *arraysize; // The number of elements along each array dimention
+ int esize ; // element size of each array element
+ int mrank ; // Compute node mesh rank
+ int *mlayout; // Compute node mesh layout
+ int drank ; // IO node mesh rank
+ int cost_model_mode; // Whether the cost model is included.
+ int *dlayout; // IO node mesh layout
+ Distribution *mem_dist; // The memory array distribution along each dimention
+ // There are three possible distributions (BLOCK,
+ // NONE, CYCLIC).
+ Distribution *disk_dist; // The disk array distribution along each dimention
+ int my_rank, my_app_size, *world_ranks, leader;
+ char sys_command[100];
+
+ MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from
+// this call, the io nodes will not return from the call. All the io nodes
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+ leader = 0;
+ world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+ for(int i=0;i< my_app_size; i++)
+ world_ranks[i] = leader+i;
+
+
+
+ Panda *bear;
+ int my_io_rank = my_rank;
+ int *io_ranks;
+
+ parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound,
+ arrayrank, arraysize, esize, mrank, mlayout, drank, dlayout,
+ mem_dist, disk_dist, cost_model_mode);
+
+ io_ranks = world_ranks;
+
+
+ if (my_io_rank<io_nodes)
+ {
+ global_bear = new Panda(PART_TIME_IO, my_rank, my_app_size, world_ranks,
+ my_io_rank, io_nodes, io_ranks);
+ bear = global_bear;
+ }
+ else
+ {
+ global_bear = new Panda(PART_TIME_COMPUTE, my_rank, my_app_size, world_ranks,
+ -1, io_nodes, io_ranks);
+ bear = global_bear;
+ }
+ for (int size=lower_bound; size <= upper_bound; size*=2) {
+ arraysize[arrayrank-1] = size;
+ gemein(bear,io_nodes, arrayrank, arraysize, esize,
+ mrank, mlayout, drank, dlayout, mem_dist,
+ disk_dist, cost_model_mode);
+ }
+
+ free(mlayout);
+ free(dlayout);
+ free(mem_dist);
+ free(disk_dist);
+ free(world_ranks);
+ delete bear;
+
+ MPI_Finalize();
+ return(0);
+}
diff --git a/src/Panda/shared_test.C b/src/Panda/shared_test.C
new file mode 100644
index 0000000..00ebaa1
--- /dev/null
+++ b/src/Panda/shared_test.C
@@ -0,0 +1,353 @@
+/*****************************************************************
+ * This is a sample program that shows how the panda library *
+ * is going to be used by the application programs. *
+ * The example command line format is in test7.script. *
+ * This example shows the interface with only disk layout *
+ * info but no stride or subchunking schema. The value for *
+ * those schemas use the default ones. *
+ * The current test varies the size of arrays. However, the *
+ * wrapper function allows the number of the nodes to be *
+ * changed as well. *
+ * The first iteration loads all the code in memory. *
+ * The second run does the simulated disk simulation. *
+ * From the third run on, the values are the real writes. *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+#include "mpirun.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag=0;
+ char time_message[100];
+
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+ t1->set_verify();
+#endif
+
+
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+
+ for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+ if (i < Num_Simulate_Write){
+ t1->set_simulate_mode();
+ flag=0;
+ }
+ else {
+ t1->reset_simulate_mode();
+ flag=1;
+ }
+
+
+ global_bear->global_barrier();
+ t1->set_io_strategy(STRATEGY);
+ timer.start();
+ t1->timestep();
+ timer.stop(":");
+ sprintf(time_message,"App_id %d: %s Write: SIZE: %d, Time %i %s",
+ MPIRUN_APP_ID, (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+
+ if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1 ) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+
+ }
+ }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+ StopWatch timer;
+ int i;
+ int flag;
+ char time_message[100];
+#ifdef VERIFYBF
+ for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern();
+#endif
+
+ if (Num_Write + Num_Simulate_Write == 0) {
+ global_bear->cleanfiles();
+ global_bear->createfiles();
+ }
+
+
+
+ for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+ if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+ else {r1->reset_simulate_mode();
+ flag=1;
+ global_bear->flushfiles();
+ }
+
+
+ global_bear->global_barrier();
+ r1->set_io_strategy(STRATEGY);
+ timer.start();
+ r1->read_timestep();
+ timer.stop(":");
+
+ sprintf(time_message,"App_id %d: %s Read: SIZE: %d, Time %i %s ",
+ MPIRUN_APP_ID, (flag==0? "Simulated":"Real"),
+ arraysize, i, timer.get_description());
+ printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+ for(i=0;i<Num_of_Arrays;i++)
+ if (arrays[i]->verify_byte_pattern())
+ printf("Byte pattern verified for array %d\n", i);
+ else
+ printf("Byte pattern incorrect for array %d\n", i);
+#endif
+ global_bear->cleanfiles();
+}
+
+
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+ int mrank, int *mlayout, int drank, int *dlayout,
+ Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+ ArrayLayout *mem1; // Memory array layout
+ ArrayLayout *disk1; // Disk array layout
+ int i;
+ Array **arrays;
+ arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+ mem1 = new ArrayLayout (mrank,mlayout);
+ disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation.
+ char *name;
+ name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+ char temp[5];
+ for (i=0; i< Num_of_Arrays; i++) {
+ strcpy(name,"z1Array");
+ sprintf(temp, "%d", i);
+ strcat(name, temp);
+ arrays[i] = new Array(name,arrayrank,arraysize,esize,
+ mem1,mem_dist,disk1, disk_dist);
+ }
+ free(name);
+
+ if (Num_Simulate_Write + Num_Write > 0) {
+ ArrayGroup *t1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) t1->insert(arrays[i]);
+ test_timestep(t1, arraysize[arrayrank-1], arrays);
+ delete t1;
+ if (Num_Simulate_Read + Num_Read > 0) {
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+ } else {
+
+ ArrayGroup *r1 = new ArrayGroup("z4timestep");
+ for (i= 0; i<Num_of_Arrays; i++) r1->insert(arrays[i]);
+ test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+ delete r1;
+ }
+
+ // delete all objects created
+
+ for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+ free(arrays);
+ delete disk1;
+ delete mem1;
+ return(0);
+}
+
+char my_getopt(char *str)
+{
+ char command[18][15];
+
+ strcpy(command[0], "-Total_nodes");
+ strcpy(command[1], "-Io_nodes");
+ strcpy(command[2], "-upper");
+ strcpy(command[3], "-Arraysize");
+ strcpy(command[4], "-Esize");
+ strcpy(command[5], "-Mlayout");
+ strcpy(command[6], "-Dlayout");
+ strcpy(command[7], "-mem_dist");
+ strcpy(command[8], "-disk_dist");
+ strcpy(command[9], "-num_arrays");
+ strcpy(command[10], "-read_simulate");
+ strcpy(command[11], "-Read");
+ strcpy(command[12], "-write_simulate");
+ strcpy(command[13], "-Write");
+ strcpy(command[14], "-interleave");
+ strcpy(command[15], "-Cost_model");
+ strcpy(command[16], "-size_message");
+ strcpy(command[17], "-Xfactor");
+
+ for (int i= 0; i< 18; i++)
+ if (!strncmp(str, command[i], 2)) return command[i][1];
+ printf("undefined input %s, quit!\n",str);
+ exit(0);
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes,
+ int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+ int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+ Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode)
+{
+ char opt;
+ int k;
+
+ for (int i=1; i<argc; ) {
+ opt = my_getopt(argv[i++]);
+ switch(opt) {
+ case 'X':
+ STRATEGY = atoi(argv[i++]);
+ break;
+ case 's':
+ SUBCHUNK_SIZE = atoi(argv[i++]);
+ break;
+ case 'T':
+ total_nodes = atoi(argv[i++]);
+ break;
+ case 'I':
+ io_nodes = atoi(argv[i++]);
+ break;
+ case 'u':
+ upper_bound = atoi(argv[i++]);
+ break;
+ case 'A':
+ arrayrank = atoi(argv[i++]);
+ arraysize = (int *) malloc(sizeof(int)* arrayrank);
+ mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+ for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+ lower_bound = arraysize[k-1];
+ break;
+ case 'E':
+ esize = atoi(argv[i++]);
+ break;
+ case 'M':
+ mrank = atoi(argv[i++]);
+ mlayout = (int *) malloc(sizeof(int)* mrank);
+ for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+ break;
+ case 'D':
+ drank = atoi(argv[i++]);
+ dlayout = (int *) malloc(sizeof(int)* drank);
+ for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+ break;
+ case 'm':
+ for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'd':
+ for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+ break;
+ case 'n':
+ Num_of_Arrays = atoi(argv[i++]);
+ break;
+ case 'r':
+ Num_Simulate_Read = atoi(argv[i++]);
+ break;
+ case 'R':
+ Num_Read = atoi(argv[i++]);
+ break;
+ case 'w':
+ Num_Simulate_Write = atoi(argv[i++]);
+ break;
+ case 'W':
+ Num_Write = atoi(argv[i++]);
+ break;
+ case 'i':
+ interleave = atoi(argv[i++]);
+ break;
+ case 'C':
+ cost_model_mode = atoi(argv[i++]);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int total_nodes; // The number of total nodes (comp + io)
+ int io_nodes; // The number of io nodes
+ int upper_bound; // The upper bound of the last dimension of the array
+ int lower_bound; // The starting number of the last dimension of the array
+ int arrayrank ; // The array rank.
+ int *arraysize; // The number of elements along each array dimention
+ int esize ; // element size of each array element
+ int mrank ; // Compute node mesh rank
+ int *mlayout; // Compute node mesh layout
+ int drank ; // IO node mesh rank
+ int cost_model_mode; // Whether the cost model is included.
+ int *dlayout; // IO node mesh layout
+ Distribution *mem_dist; // The memory array distribution along each dimention
+ // There are three possible distributions (BLOCK,
+ // NONE, CYCLIC).
+ Distribution *disk_dist; // The disk array distribution along each dimention
+ int my_rank, my_app_size, *world_ranks, leader;
+
+
+ MPI_Init(&argc, &argv);
+ MPIRUN_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from
+// this call, the io nodes will not return from the call. All the io nodes
+
+ MPI_Comm_rank(MPIRUN_APP_COMM, &my_rank);
+ MPI_Comm_size(MPIRUN_APP_COMM, &my_app_size);
+ leader = MPIRUN_APP_LEADERS[MPIRUN_APP_ID];
+ world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+ for(int i=0;i< my_app_size; i++)
+ world_ranks[i] = leader+i;
+ printf("MPIRUN_APP_ID = %d\n", MPIRUN_APP_ID);
+ Panda * bear = new Panda(COMPUTE_NODE, MPIRUN_APP_ID, my_rank, my_app_size,
+ world_ranks);
+ global_bear = bear;
+
+
+
+
+ parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, arrayrank,
+ arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+ for (int size=lower_bound; size <= upper_bound; size*=2) {
+ arraysize[arrayrank-1] = size;
+ gemein(bear,io_nodes, arrayrank, arraysize, esize,
+ mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+ }
+ free(arraysize);
+ free(mlayout);
+ free(dlayout);
+ free(mem_dist);
+ free(disk_dist);
+ free(world_ranks);
+ delete bear;
+ MPI_Finalize();
+ return(0);
+}
diff --git a/src/Startup.c b/src/Startup.c
new file mode 100644
index 0000000..500c6bd
--- /dev/null
+++ b/src/Startup.c
@@ -0,0 +1,77 @@
+ /*@@
+ @file Startup.c
+ @date 01 Oct 1999
+ @author Jonghyun Lee
+ @desc Startup routines for IOPanda.
+ @enddesc
+ @history
+ @endhistory
+ @@*/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#include "CactusBase/IOUtil/src/ioGH.h"
+
+/* prototypes of functions to be registered */
+int IOPanda_Output3DGH (cGH *GH);
+int IOPanda_TriggerOutput3D (cGH *GH, int);
+int IOPanda_TimeFor3D (cGH *GH, int);
+int IOPanda_Output3DVarAs (cGH *GH, const char *var, const char *alias);
+void *IOPanda_SetupGH (tFleshConfig *config, int convergence_level, cGH *GH);
+int IOPanda_InitGH (cGH *GH);
+int IOPanda_RecoverGH (cGH *GH, const char *basename, int called_from);
+
+ //void Panda_Create(int, int);
+void Panda_Finalize(void);
+
+ /*@@
+ @routine IOPanda_Startup
+ @date Fri May 21 1999
+ @author Thomas Radke
+ @desc
+ The startup registration routine for IOPanda.
+ Registers the GH extensions needed for IOPanda and
+ the registerable routines used for each method of IOPanda.
+ IOPanda does not overload any functions.
+ @enddesc
+ @calls
+ @calledby
+ @history
+
+ @endhistory
+
+@@*/
+void IOPanda_Startup (void)
+{
+ int IO_GHExtension;
+ int IOMethod;
+
+ IO_GHExtension = CCTK_RegisterGHExtension ("IOPanda");
+ CCTK_RegisterGHExtensionSetupGH (IO_GHExtension, IOPanda_SetupGH);
+ CCTK_RegisterGHExtensionInitGH (IO_GHExtension, IOPanda_InitGH);
+
+ /* Register the 3D IOPandaIO routines as output methods */
+ IOMethod = CCTK_RegisterIOMethod ("IOPandaIO_3D");
+ CCTK_RegisterIOMethodOutputGH (IOMethod, IOPanda_Output3DGH);
+ CCTK_RegisterIOMethodOutputVarAs (IOMethod, IOPanda_Output3DVarAs);
+ CCTK_RegisterIOMethodTimeToOutput (IOMethod, IOPanda_TimeFor3D);
+ CCTK_RegisterIOMethodTriggerOutput (IOMethod, IOPanda_TriggerOutput3D);
+
+#if 0
+ /* Register the IOPanda recovery routine to thorn IOUtil */
+ if (IOUtil_RegisterRecover ("IOPanda recovery", IOPanda_RecoverGH) < 0)
+ CCTK_WARN (1, "Failed to register IOPanda recovery routine");
+ Panda_Create(1, 1);
+#endif
+
+}
+
+void IOPanda_Finalize(void)
+{
+ Panda_Finalize();
+}
diff --git a/src/ioPandaGH.h b/src/ioPandaGH.h
new file mode 100644
index 0000000..f4da8d7
--- /dev/null
+++ b/src/ioPandaGH.h
@@ -0,0 +1,32 @@
+ /*@@
+ @header ioPandaGH.h
+ @date 01 Oct 1999
+ @author Jonghyun Lee
+ @desc The extensions to the GH structure from IOPanda.
+ @history
+ @endhistory
+ @@*/
+
+#include <string.h>
+
+#include "StoreNamedData.h"
+
+
+typedef struct IOPandaGH {
+
+ /* The number of times output */
+ int *IO_3Dnum;
+
+ /* How often to output */
+ int IO_3Devery;
+
+ /* Directory in which to output */
+ char *outpfx_3D;
+
+ /* The last iteration output */
+ int *IO_3Dlast;
+
+ /* filename database for opened files */
+ pNamedData *fileList_3D;
+
+} pandaGH;
diff --git a/src/make.code.defn b/src/make.code.defn
new file mode 100644
index 0000000..174c78b
--- /dev/null
+++ b/src/make.code.defn
@@ -0,0 +1,3 @@
+SRCS = Startup.c GHExtension.c Output3D.c DumpVar.c
+
+SUBDIRS = Panda
diff --git a/src/make.configuration.defn b/src/make.configuration.defn
new file mode 100644
index 0000000..64d3f73
--- /dev/null
+++ b/src/make.configuration.defn
@@ -0,0 +1,21 @@
+# make.configuration.defn for IOPanda
+
+# make sure that IOPanda was configured in with MPI and IEEEIO
+
+ifeq ($(strip $(HAVE_IEEEIO)), )
+$(NAME): MissingIEEEIO
+.pseudo: MissingIEEEIO
+MissingIEEEIO:
+ @echo "IOPanda: requires IEEEIO"
+ @echo "IOPanda: Please configure Cactus with thorn external/IEEEIO or remove IOPanda from Thornlist !"
+ exit 2
+endif
+
+ifeq ($(strip $(MPI_LIBS)), )
+$(NAME): MissingMPI
+.pseudo: MissingMPI
+MissingMPI:
+ @echo "IOPanda: requires MPI"
+ @echo "IOPanda: Please configure Cactus with MPI or remove IOPanda from Thornlist !"
+ exit 2
+endif