Added Jonghyun's IOPanda thorn.

Tested on O2K, needs porting/testing on other architectures. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOPanda/trunk@2 38c3d835-c875-442e-b0fe-21c19ce1d001
author: tradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001> 1999-10-05 01:24:27 +0000
committer: tradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001> 1999-10-05 01:24:27 +0000
commit: 3aa41187b549ff9a792d673e86efe5220848d73f (patch)
tree: 0bdc27f2b94a7dfd29cf4d0c25c2cd819fd833a2 /src
parent: 8a113f1371d777ca62b6c690e2f44bbebebd79c0 (diff)
60 files changed, 10165 insertions, 0 deletions
diff --git a/src/DumpVar.c b/src/DumpVar.c
new file mode 100644
index 0000000..86ed6d9
--- /dev/null
+++ b/src/DumpVar.c
@@ -0,0 +1,202 @@
+/*@@
+   @file      DumpVar.c
+   @date      01 Oct 1999
+   @author    Jonghyun Lee
+   @desc      Do the actual writing of a 3D grid function,
+              for output or for checkpointing
+   @enddesc 
+   @history
+   @hendhistory
+ @@*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef SGI
+#include <time.h>
+#endif
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_GroupsOnGH.h"
+#include "cctk_Comm.h"
+#include "cctk_WarnLevel.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#ifdef CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#include "CactusBase/IOUtil/src/ioGH.h"
+#include "ioPandaGH.h"
+
+
+#define IOTAGBASE 20000 /* This may break on more than 2000 processors */
+
+
+static char *char_time_date = NULL;
+
+
+void IOPanda_getDumpData (cGH *GH, int index, int timelevel, void **outme,
+                         int *free_outme, CCTK_INT4 bnd [9], int element_size) 
+{
+  DECLARE_CCTK_PARAMETERS
+  int i;
+  int myproc;
+  ioGH *ioUtilGH;
+  pGH *pughGH;
+  CCTK_REAL4 *single_ptr;
+  CCTK_REAL *real_ptr;
+  CCTK_CHAR *char_ptr;
+  CCTK_INT *int_ptr;
+  void *data = CCTK_VarDataPtrI (GH, timelevel, index);
+
+  /* to make the compiler happy */
+  single_ptr = NULL;
+  real_ptr = NULL;
+  char_ptr = NULL;
+  int_ptr = NULL;
+
+  ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+  pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")];
+
+  myproc = CCTK_MyProc (GH);
+
+  if (ioUtilGH->downsample_x == 1 &&
+      ioUtilGH->downsample_y == 1 &&
+      ioUtilGH->downsample_z == 1) {
+
+    if (ioUtilGH->out_single) {
+      single_ptr = (CCTK_REAL4 *) malloc (pughGH->npoints*sizeof (CCTK_REAL4));
+
+      for (i = 0; i < pughGH->npoints; i++)
+        single_ptr [i] = (CCTK_REAL4) ((CCTK_REAL *) data) [i];
+
+      *outme = single_ptr;
+      *free_outme = 1;
+    } else {
+      *outme = data;
+      *free_outme = 0;
+    }
+
+    for (i = 0; i < 3; i++) {
+      bnd [i] = GH->cctk_lbnd[i];     /* the bounds */
+      bnd [i+3] = GH->cctk_lsh[i];   /* the sizes */
+      bnd [i+6] = GH->cctk_gsh[i];  /* the global space */
+    }
+
+  } else {
+
+    int start [3], end [3];
+    int i, j, k, l;
+
+    /* Downsampling code ... */
+    bnd [6] = GH->cctk_gsh[0] / ioUtilGH->downsample_x;
+    if (GH->cctk_gsh[0] % ioUtilGH->downsample_x)
+      bnd [6]++;
+    bnd [7] = GH->cctk_gsh[1] / ioUtilGH->downsample_y;
+    if (GH->cctk_gsh[1] % ioUtilGH->downsample_y)
+      bnd [7]++;
+    bnd [8] = GH->cctk_gsh[2] / ioUtilGH->downsample_z;
+    if (GH->cctk_gsh[2] % ioUtilGH->downsample_z)
+      bnd [8]++;
+
+    if (verbose) 
+      printf ("Downsampled sizes (%d, %d, %d) -> (%d, %d, %d)\n",
+              GH->cctk_gsh[0], GH->cctk_gsh[1], GH->cctk_gsh[2],
+              (int) bnd [6], (int) bnd [7], (int) bnd [8]);
+    
+    /* Now figure out the local downsampling */
+    /* The local starts are the lb modded into the downsample */
+    for (i = 0; i < 3; i++) {
+      int downsample;
+
+      if (i == 0)
+        downsample = ioUtilGH->downsample_x;
+      else if (i == 1)
+        downsample = ioUtilGH->downsample_y;
+      else
+        downsample = ioUtilGH->downsample_z;
+
+      bnd [i]   = GH->cctk_lbnd[i] / downsample;
+      start [i] = bnd [i] * downsample;
+      if (start [i] <
+          GH->cctk_lbnd[i] + pughGH->ownership [PUGH_VERTEXCTR][i][0]) {
+        start [i] += downsample;
+        bnd [i] ++;
+      }
+      end [i]   = ((GH->cctk_lbnd [i] +
+                   pughGH->ownership [PUGH_VERTEXCTR][i][1] - 1) / downsample)
+                   * downsample;
+      bnd [i+3] = (end [i] - start [i]) / downsample + 1;
+    }
+
+    if (verbose) {
+      printf ("Downsample ranges (%d, %d, %d) -> (%d, %d, %d)\n",
+        start [0], start [1], start [2],
+        end [0], end [1], end [2]);
+      printf ("Local size/bound  (%d, %d, %d)  (%d, %d, %d)\n",
+        (int) bnd [3], (int) bnd [4], (int) bnd [5],
+        (int) bnd [0], (int) bnd [1], (int) bnd [2]);
+    }
+
+    /* compute local ranges */
+    for (i = 0; i < 3; i++) {
+      start [i] -= GH->cctk_lbnd [i];
+      end [i] -= GH->cctk_lbnd [i];
+    }
+
+    *outme = malloc (bnd [3] * bnd [4] * bnd [5] * element_size);
+    *free_outme = 1;
+
+    /* I hate it to repeat the loops for each case label
+       but that way produces much more efficient code */
+    l = 0;
+    switch (CCTK_VarTypeI (index)) {
+      case CCTK_VARIABLE_CHAR:
+        char_ptr = (CCTK_CHAR *) *outme;
+        for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+          for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+            for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+              char_ptr [l++] = ((CCTK_CHAR *) data) [DI (pughGH, i, j, k)];
+        break;
+
+      case CCTK_VARIABLE_INT:
+        int_ptr = (CCTK_INT *) *outme;
+        for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+          for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+            for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+              int_ptr [l++] = ((CCTK_INT *) data) [DI (pughGH, i, j, k)];
+        break;
+
+      case CCTK_VARIABLE_REAL:
+        if (ioUtilGH->out_single)
+          single_ptr = (CCTK_REAL4 *) *outme;
+        else
+          real_ptr = (CCTK_REAL *) *outme;
+        for (k = start [2]; k <= end [2]; k += ioUtilGH->downsample_z)
+          for (j = start [1]; j <= end [1]; j += ioUtilGH->downsample_y)
+            for (i = start [0]; i <= end [0]; i += ioUtilGH->downsample_x)
+              if (ioUtilGH->out_single)
+                single_ptr [l++] = (CCTK_REAL4)
+                                  (((CCTK_REAL *) data) [DI (pughGH, i, j, k)]);
+              else
+                real_ptr [l++] = ((CCTK_REAL *) data) [DI (pughGH, i, j, k)];
+        break;
+
+      default:
+        CCTK_WARN (1, "Unsupported variable type in IOPanda_getDumpData");
+        return;
+    }
+  }
+
+  if (verbose) {
+    printf ("Global size: %d %d %d\n",
+            (int) bnd [6], (int) bnd [7], (int) bnd [8]);
+    printf ("Lower bound: %d %d %d\n",
+            (int) bnd [0], (int) bnd [1], (int) bnd [2]);
+    printf ("Chunk size : %d %d %d\n",
+            (int) bnd [3], (int) bnd [4], (int) bnd [5]);
+  }
+}
+
+
diff --git a/src/GHExtension.c b/src/GHExtension.c
new file mode 100644
index 0000000..210db6a
--- /dev/null
+++ b/src/GHExtension.c
@@ -0,0 +1,90 @@
+ /*@@
+   @file      GHExtension.c
+   @date      01 Oct 1999
+   @author    Jonghyun Lee
+   @desc      IOPanda GH extension stuff
+   @enddesc 
+   @history
+   @endhistory
+ @@*/
+
+/*#define DEBUG_IO*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_Comm.h"
+#include "cctk_Misc.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#include "cctk_WarnLevel.h"
+#ifdef	CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#include "CactusBase/IOUtil/src/ioGH.h"
+#include "ioPandaGH.h"
+
+void Panda_Create(int, int);
+
+void *IOPanda_SetupGH (tFleshConfig *config, int convergence_level, cGH *GH)
+{
+  int i, numvars;
+  pandaGH *newGH;
+
+  numvars = CCTK_NumVars ();
+
+  newGH = (pandaGH *) malloc (sizeof (pandaGH));
+  newGH->IO_3Dnum = (int *) malloc (numvars * sizeof (int));
+  newGH->IO_3Dlast = (int *) malloc (numvars * sizeof (int));
+
+  return (newGH);
+}
+
+int IOPanda_InitGH (cGH *GH)
+{
+  DECLARE_CCTK_PARAMETERS
+  int i;
+  ioGH *ioUtilGH;
+  pandaGH *myGH; 
+
+  /* get the handles for IOUtil and IOPanda extensions */
+  ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+  myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+  
+  /* How often to output */
+  myGH->IO_3Devery = out_every;
+  if (out3D_every > 0)
+    myGH->IO_3Devery = out3D_every;
+
+  InitIONum (myGH->IO_3Dnum, out3D_vars);
+
+  /* Deal with the output directories */
+  myGH->outpfx_3D = outdir;
+  if (!CCTK_Equals(outdir3D,"outdir"))
+    myGH->outpfx_3D = outdir3D;
+
+  /* Create the output directories */
+  if (myGH->IO_3Devery > 0) {
+    if (CCTK_MyProc (GH) == 0) {
+      FILE *fp;
+
+      if (CCTK_mkdir (myGH->outpfx_3D) != 0)
+        CCTK_WARN (2,"Problem creating IO 3D directory");
+      fp = fopen("FILEPREFIX", "w");
+      fprintf(fp, "%s", myGH->outpfx_3D);
+      fclose(fp);
+    }
+  }
+
+  for (i=0; i<CCTK_NumVars(); i++)
+    myGH->IO_3Dlast [i] = -1;
+
+  myGH->fileList_3D = NULL;
+
+  Panda_Create(ioUtilGH->ioproc_every, 1);
+
+  return (0);
+}
diff --git a/src/Output3D.c b/src/Output3D.c
new file mode 100644
index 0000000..41143a9
--- /dev/null
+++ b/src/Output3D.c
@@ -0,0 +1,487 @@
+ /*@@
+   @file      Output3D.c
+   @date      01 Oct 1999
+   @author    Jonghyun Lee
+   @desc      Functions to deal 3D output of GFs
+   @enddesc 
+   @history
+   @hendhistory
+ @@*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_Groups.h"
+#include "cctk_parameters.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_WarnLevel.h"
+#include "cctk_Comm.h"
+#include "ioPandaGH.h"
+#include "Panda/c_interface.h"
+#include "CactusBase/IOUtil/src/ioGH.h"
+#ifdef CACTUSPUGH_PUGH
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
+#ifdef SGI
+#include <time.h>
+#endif
+
+#include "external/IEEEIO/src/IOProtos.h"
+
+int IOPanda_Output3DVarAs (cGH *GH, const char *var, const char *alias);
+int IOPanda_TimeFor3D (cGH *GH, int index);
+void IOPanda_Timestep (cGH *GH, int index, const char *alias);
+void IOPanda_AddCommonAttributes (cGH *GH, int index, int timelevel, CCTK_INT *gsz, char *fname);
+void IOPanda_IEEEIOStructDump (cGH *GH, char *fname);
+
+/*@@
+   @routine    IOPanda_Output3DGH
+   @date       Sat March 6 1999
+   @author     Gabrielle Allen
+   @desc
+   Loops over all variables and outputs them if necessary
+   @enddesc
+   @calls    CCTK_GHExtensionHandle
+             CCTK_NumVars
+             CCTK_ImplementationFromVar
+             CCTK_VarName
+             IOPanda_TimeFor3D
+             IOPanda_Output3DVarAs
+   @calledby
+   @history
+
+   @endhistory
+   @var     GH
+   @vdesc   Pointer to CCTK GH
+   @vtype   cGH
+   @vio     in
+   @vcomment
+   @endvar
+@@*/
+
+int IOPanda_Output3DGH (cGH *GH)
+{
+  int i;
+  pandaGH *myGH;
+  char *implementation;
+  char *name;
+  char *fullname;
+  DECLARE_CCTK_PARAMETERS
+
+  /* Get the GH extension for IOPanda */
+  myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+  if (myGH->IO_3Devery <= 0)
+    return;
+
+  /* Loop over all variables */
+  for (i = 0; i < CCTK_NumVars (); i++) {
+    if (IOPanda_TimeFor3D (GH, i)) {
+      implementation = CCTK_ImpFromVarI (i);
+      name           = CCTK_VarName (i);
+      fullname       = (char *) malloc (strlen (implementation) +
+                                        strlen (name) + 3);
+      assert (fullname);
+      sprintf (fullname, "%s::%s", implementation, name);
+
+      if (verbose) {
+        printf ("IOPanda Output3DGH : \n");
+        printf ("  fullname/name = %s/%s\n", fullname, name);
+      }
+
+      IOPanda_Output3DVarAs (GH, fullname, name);
+
+      free (fullname);
+
+      /* Register another 3D output for this GF */
+      myGH->IO_3Dnum [i]++;
+
+      /* Register GF as having 3D output this iteration */
+      myGH->IO_3Dlast [i] = GH->cctk_iteration;
+    }
+  }
+
+  return (0);
+}
+
+
+/*@@
+   @routine    IOPanda_Output3DVarAs
+   @date       Sat March 6 1999
+   @author     Gabrielle Allen
+   @desc
+   unconditional output of a variable using the IOPanda 3D output method
+   @enddesc
+   @calls    CCTK_DecomposeName
+             CCTK_VarIndex
+             CCTK_GHExtensionHandle
+             IOPanda_Write3D
+   @calledby IOPanda_Output3DGH
+   @history
+
+   @endhistory
+   @var     GH
+   @vdesc   Pointer to CCTK GH
+   @vtype   cGH
+   @vio     in
+   @vcomment
+   @endvar
+   @var     fullname
+   @vdesc   complete name of variable to output
+   @vtype   const char *
+   @vio     in
+   @vcomment
+   @endvar
+   @var     alias
+   @vdesc   alias name of variable to output (used to generate output filename)
+   @vtype   const char *
+   @vio     in
+   @vcomment
+   @endvar
+@@*/
+
+int IOPanda_Output3DVarAs (cGH *GH, const char *fullname, const char *alias)
+{
+  DECLARE_CCTK_PARAMETERS
+  int index;
+  pandaGH *myGH;
+
+  index = CCTK_VarIndex(fullname);
+
+  /* Get the GH extension for IOPanda */
+  myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+  if (verbose) {
+    printf ("\nIn IOPanda Output3DVarAs\n-------------------\n");
+    printf ("  Fullname = -%s-\n", fullname);
+    printf ("  Alias = -%s-\n", alias);
+    printf ("  Index = %d\n", index);
+  }
+
+  /* Do the 3D output */
+  IOPanda_Timestep (GH, index, alias);
+
+  return (0);
+}
+
+
+/*@@
+   @routine    IOPanda_TimeFor3D
+   @date       Sat March 6 1999
+   @author     Gabrielle Allen
+   @desc
+   Decides if it is time to output a variable using the IOPanda 3D output
+   method
+   @enddesc
+   @calls    CCTK_GHExtensionHandle
+             CCTK_GroupTypeFromVarI
+             CCTK_WARN
+             CCTK_QueryGroupStorageI
+             CCTK_GroupNameFromVarI
+   @calledby IOPanda_Output3DGH
+   @history
+
+   @endhistory
+   @var     GH
+   @vdesc   Pointer to CCTK GH
+   @vtype   cGH
+   @vio     in
+   @vcomment
+   @endvar
+   @var     index
+   @vdesc   index of variable
+   @vtype   int
+   @vio     in
+   @vcomment
+   @endvar
+@@*/
+
+int IOPanda_TimeFor3D (cGH *GH, int index)
+{
+  pandaGH *myGH;
+
+  /* Get the GH extension for IOPanda */
+  myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+  /* Check this GF should be output */
+  if (! (myGH->IO_3Dnum [index] != 0 &&
+         GH->cctk_iteration % myGH->IO_3Devery == 0))
+    return (0);
+
+  /* Check GF not already output this iteration */
+  if (myGH->IO_3Dlast [index] == GH->cctk_iteration) {
+      CCTK_WARN (2, "Already done 3D output in IOPanda");
+      return (0);
+  }
+
+  /* Check GF has storage */
+  if (! CCTK_QueryGroupStorageI (GH,
+            CCTK_GroupIndexFromVarI(index))) {
+    char *fullname = CCTK_FullName (index);
+    char *msg = (char *) malloc (80 + strlen (fullname));
+
+    sprintf (msg, "No IOPandaIO 3D output for '%s' (no storage)", fullname);
+    CCTK_WARN (2, msg);
+    free (fullname);
+    free (msg);
+    return (0);
+  }
+
+  return (1);
+}
+
+
+/*@@
+   @routine    IOPanda_TriggerOutput3D
+   @date       Sat March 6 1999
+   @author     Gabrielle Allen
+   @desc
+   Triggers the output a variable using the IOPanda 3D output
+   method
+   @enddesc
+   @calls    CCTK_GHExtensionHandle
+             CCTK_VarName
+             IOPanda_Write3D
+   @calledby
+   @history
+
+   @endhistory
+   @var     GH
+   @vdesc   Pointer to CCTK GH
+   @vtype   cGH
+   @vio     in
+   @vcomment
+   @endvar
+   @var     index
+   @vdesc   index of variable to output
+   @vtype   int
+   @vio     in
+   @vcomment
+   @endvar
+@@*/
+
+int IOPanda_TriggerOutput3D (cGH *GH, int index)
+{
+  DECLARE_CCTK_PARAMETERS
+  pandaGH *myGH;
+  char *varname;
+
+  varname = CCTK_VarName (index);
+
+  /* Get the GH extension for IOPanda */
+  myGH = (pandaGH *) GH->extensions [CCTK_GHExtensionHandle ("IOPanda")];
+
+  if (verbose) {
+    printf("\nIn IOPanda TriggerOutput3D\n---------------------\n");
+    printf("  Index = %d\n", index);
+    printf("  Variable = -%s-\n", varname);
+  }
+
+  /* Do the 3D output */
+  IOPanda_Timestep (GH, index, varname);
+
+  /* Register another 3D output for this GF */
+  myGH->IO_3Dnum [index]++;
+
+  /* Register GF as having 3D output this iteration */
+  myGH->IO_3Dlast [index] = GH->cctk_iteration;
+
+  return (0);
+}
+
+void IOPanda_Timestep(cGH *GH, int index, const char *alias)
+{
+  DECLARE_CCTK_PARAMETERS
+  void *data;
+  int tmp[1], tmp1[3], tmp2[3];
+  Distribution dist1[3], dist2[3];
+  CCTK_INT4 bnd[9];
+  int free_flag, timelevel;
+  ArrayInfo ainfo;
+
+  ioGH *ioUtilGH;
+  pGH *pughGH;
+
+  if (CCTK_GroupTypeFromVarI (index) == GROUP_SCALAR) {
+    printf("##### %s is scalar\n", alias);
+    return;
+  } 
+  
+  ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+  pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")];
+  
+  ainfo.name_ = (char *)alias;
+  
+  ainfo.rank_ = 3;
+  tmp1[0] = GH->cctk_gsh[2]; 
+  tmp1[1] = GH->cctk_gsh[1]; 
+  tmp1[2] = GH->cctk_gsh[0]; 
+  ainfo.size_ = tmp1;
+  
+  switch (CCTK_VarTypeI (index)) {
+  case CCTK_VARIABLE_CHAR:
+    ainfo.esize_ = CHAR;
+    break;
+  case CCTK_VARIABLE_INT:
+#ifdef  CCTK_INTEGER_PRECISION_8
+    ainfo.esize_ = INT64;
+#elif   CCTK_INTEGER_PRECISION_4
+    ainfo.esize_ = INT32;
+#elif   CCTK_INTEGER_PRECISION_2
+    ainfo.esize_ = INT16;
+#endif
+    break;
+  case CCTK_VARIABLE_REAL:
+    if (ioUtilGH->out_single) ainfo.esize_ = FLOAT32;
+    else {
+#ifdef  CCTK_REAL_PRECISION_8
+      ainfo.esize_ = FLOAT64;
+#elif   CCTK_REAL_PRECISION_4
+      ainfo.esize_ = FLOAT32;
+#endif    
+    }
+  }
+  
+  ainfo.mem_rank_ = 3;
+  tmp2[0] = pughGH->nprocz; tmp2[1] = pughGH->nprocy; tmp2[2] = pughGH->nprocx;
+  ainfo.mem_layout_ = tmp2;
+  dist1[0] = dist1[1] = dist1[2] = BLOCK;
+  ainfo.mem_dist_ = dist1;
+  
+  ainfo.disk_rank_ = 1;
+  dist2[0] = BLOCK; dist2[1] = dist2[2] = NONE;
+  tmp[0]= ((CCTK_nProcs(GH) - 1) / ioUtilGH->ioproc_every + 1);
+  
+  ainfo.disk_layout_ = tmp;
+  ainfo.disk_dist_ = dist2;
+
+  timelevel = CCTK_NumTimeLevelsFromVarI (index) - 1;
+  if (timelevel > 0) timelevel--;
+  
+  IOPanda_getDumpData(GH, index, timelevel, &data, &free_flag, bnd, 
+		      ainfo.esize_);
+  ainfo.data_ = (char *)data;
+  ainfo.stencil_width_ = pughGH->nghostzones;
+
+
+  PandaTimestep(&ainfo);
+  IOPanda_AddCommonAttributes(GH, index, timelevel, ainfo.size_, ainfo.name_);
+  if (PandaIsNewFile(ainfo.name_)) IOPanda_IEEEIOStructDump(GH, ainfo.name_);
+}
+
+void IOPanda_AddCommonAttributes (cGH *GH, int index, int timelevel,
+                                 CCTK_INT4 gsz [3], char *fname)
+{
+  DECLARE_CCTK_PARAMETERS
+  CCTK_REAL d3_to_IO [6]; /* buffer for writing doubles to IEEEIO */
+  CCTK_INT4 i_to_IO;      /* buffer for writing an int to IEEEIO */
+  char *name, *gname;
+  ioGH *ioUtilGH;
+  char *char_time_date = "";
+
+#ifdef SGI
+  time_t t = time(NULL);
+  char_time_date = asctime (localtime (&t));
+#endif
+
+  /* Get the handle for IO extensions */
+  ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+
+  name = CCTK_FullName (index);
+  
+  Panda_WriteAttribute (fname, "name", BYTE, strlen (name) + 1, name);
+  
+  free (name);
+
+  gname = CCTK_GroupNameFromVarI (index);
+  Panda_WriteAttribute (fname, "groupname", BYTE, strlen (gname) + 1, gname);
+  free (gname);
+
+  i_to_IO = CCTK_GroupTypeFromVarI (index);
+  Panda_WriteAttribute (fname, "grouptype", INT32,
+                       1, &i_to_IO);
+
+  i_to_IO = CCTK_NumTimeLevelsFromVarI (index);
+  Panda_WriteAttribute (fname, "ntimelevels", INT32,
+                       1, &i_to_IO);
+
+  i_to_IO = timelevel;
+  Panda_WriteAttribute (fname, "timelevel", INT32,
+                       1, &i_to_IO);
+
+  if (char_time_date && out3D_datestamp)
+    Panda_WriteAttribute (fname, "date", BYTE,
+                         strlen (char_time_date) + 1, char_time_date);
+
+  Panda_WriteAttribute (fname, "time", FLOAT64, 1,&GH->cctk_time);
+
+  d3_to_IO [0] = CCTK_CoordOrigin ("x");
+  d3_to_IO [1] = CCTK_CoordOrigin ("y");
+  d3_to_IO [2] = CCTK_CoordOrigin ("z");
+  Panda_WriteAttribute (fname, "origin", FLOAT64,3,d3_to_IO);
+  CCTK_CoordRange (GH, &d3_to_IO [0], &d3_to_IO [3], "x");
+  CCTK_CoordRange (GH, &d3_to_IO [1], &d3_to_IO [4], "y");
+  CCTK_CoordRange (GH, &d3_to_IO [2], &d3_to_IO [5], "z");
+  Panda_WriteAttribute (fname, "min_ext",FLOAT64,3,d3_to_IO);
+  Panda_WriteAttribute (fname, "max_ext",FLOAT64, 3,d3_to_IO+3);
+
+  d3_to_IO [0] = GH->cctk_delta_space [0] * ioUtilGH->downsample_x;
+  d3_to_IO [1] = GH->cctk_delta_space [1] * ioUtilGH->downsample_y;
+  d3_to_IO [2] = GH->cctk_delta_space [2] * ioUtilGH->downsample_z;
+  Panda_WriteAttribute (fname, "delta", FLOAT64, 3,d3_to_IO);
+
+  if (ioUtilGH->downsample_x > 1 ||
+      ioUtilGH->downsample_y > 1 ||
+      ioUtilGH->downsample_z > 1) {
+    d3_to_IO [0] = GH->cctk_delta_space [0];
+    d3_to_IO [1] = GH->cctk_delta_space [1];
+    d3_to_IO [2] = GH->cctk_delta_space [2];
+    Panda_WriteAttribute (fname, "evolution_delta", FLOAT64, 3, d3_to_IO);
+  }
+
+  Panda_WriteAttribute (fname, "global_size", INT32, 3, gsz);
+
+  i_to_IO = CCTK_nProcs (GH);
+  Panda_WriteAttribute (fname, "nprocs", INT32, 1, &i_to_IO);
+
+  i_to_IO = ioUtilGH->ioproc_every;
+  Panda_WriteAttribute (fname, "ioproc_every", INT32, 1, &i_to_IO);
+
+  i_to_IO = ioUtilGH->unchunked;
+  Panda_WriteAttribute (fname, "unchunked", INT32, 1, &i_to_IO);
+
+  i_to_IO = GH->cctk_iteration;
+  Panda_WriteAttribute (fname, "iteration", INT32, 1, &i_to_IO);
+}  
+
+
+void IOPanda_IEEEIOStructDump (cGH *GH, char *fname)
+{
+
+  CCTK_INT4 i_temp;
+  CCTK_REAL d_temp;
+  ioGH *ioUtilGH;
+
+  
+  ioUtilGH = (ioGH *) GH->extensions [CCTK_GHExtensionHandle ("IO")];
+
+  i_temp = GH->cctk_iteration;
+  Panda_WriteAttribute (fname, "GH$iteration", INT32,
+                       1, &i_temp);
+
+  i_temp = ioUtilGH->ioproc_every;
+  Panda_WriteAttribute (fname, "GH$ioproc_every", INT32,
+                       1, &i_temp);
+
+  i_temp = CCTK_nProcs (GH);
+  Panda_WriteAttribute (fname, "GH$nprocs", INT32,
+                       1, &i_temp);
+
+  d_temp = GH->cctk_time;
+  Panda_WriteAttribute (fname, "GH$time", FLOAT64,
+                       1, &d_temp);
+}
diff --git a/src/Panda/App_Info.C b/src/Panda/App_Info.C
new file mode 100644
index 0000000..77f1d4b
--- /dev/null
+++ b/src/Panda/App_Info.C
@@ -0,0 +1,96 @@
+#include "definitions.h"
+#include "App_Info.h"
+
+App_Info::App_Info(int app_num, int app_size, int *world_ranks)
+{
+ int world_size;
+
+   app_num_        = app_num;
+   app_size_       = app_size;
+   world_ranks_    = copy_int_list(app_size, world_ranks);
+   MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+   relative_ranks_ = (int *) malloc(sizeof(int)*world_size);
+   for(int i=0; i < world_size; i++) 
+	relative_ranks_[i] = -1;
+   for(i=0; i < app_size_; i++)
+   	relative_ranks_[world_ranks_[i]] = i;
+   intra_comm_ = NULL;
+   combine_count_ = 0;
+   
+#ifdef DEBUG
+   printf("Creating an new App Info object\n");
+   printf("App_num = %d  App_size = %d\n", app_num_, app_size_);
+   printf("Ranks relative:   world:    world-relative\n");
+   for(int j=0;j<app_size_;j++)
+   printf("            %d       %d              %d\n", j, world_ranks_[j],
+				relative_ranks_[world_ranks_[j]]);
+#endif
+}
+
+
+App_Info::~App_Info()
+{
+   if (world_ranks_ != NULL) free(world_ranks_);
+   if (relative_ranks_ != NULL) free(relative_ranks_);
+   if (intra_comm_ != NULL)
+   {
+	MPI_Comm_free(intra_comm_);
+ 	free(intra_comm_);
+	intra_comm_ =NULL;
+   }
+   world_ranks_ = NULL;
+   relative_ranks_ = NULL;
+}
+
+
+int App_Info::app_num(){ return app_num_;}
+
+int App_Info::app_size(){ return app_size_;}
+
+int App_Info::get_master(){ return world_ranks_[0];}
+
+int App_Info::world_rank(int relative_rank)
+{
+  return world_ranks_[relative_rank];
+}
+
+int App_Info::relative_rank(int world_rank)
+{
+  return relative_ranks_[world_rank];
+}
+				
+void App_Info::set_intra_comm(MPI_Comm *intra_comm)
+{
+  intra_comm_ = intra_comm;
+}
+
+MPI_Comm* App_Info::intra_comm()
+{
+  return intra_comm_;
+}
+
+void App_Info::inc_combine_count()
+{
+  combine_count_++;
+}
+
+int App_Info::combine_count()
+{
+  return combine_count_;
+}
+
+void App_Info::reset_combine_count()
+{
+  combine_count_ = 0;
+}
+
+
+int* App_Info::world_ranks(){
+  return world_ranks_;
+}
+
+void App_Info::world_ranks(int *ret_list)
+{
+  for(int i=0; i < app_size_; i++)
+    ret_list[i] = world_ranks_[i];
+}
diff --git a/src/Panda/App_Info.h b/src/Panda/App_Info.h
new file mode 100644
index 0000000..f5d9664
--- /dev/null
+++ b/src/Panda/App_Info.h
@@ -0,0 +1,31 @@
+#ifndef App_Info_dot_h
+#define App_Info_dot_h
+
+#include "mpi.h"
+
+class App_Info {
+  int              app_num_;
+  int              app_size_;
+  int              *world_ranks_;
+  int              *relative_ranks_;
+  MPI_Comm         *intra_comm_;
+  int              combine_count_;
+
+ public:
+  App_Info(int,int,int*);
+  virtual ~App_Info();
+  int app_num();
+  int app_size();
+  int get_master();
+  int world_rank(int);
+  int relative_rank(int);
+  void set_intra_comm(MPI_Comm *);
+  MPI_Comm* intra_comm();
+  void inc_combine_count();
+  int  combine_count();
+  void reset_combine_count();
+  int *world_ranks();
+  void world_ranks(int*);
+};
+
+#endif  
diff --git a/src/Panda/Array.C b/src/Panda/Array.C
new file mode 100644
index 0000000..e2fd7eb
--- /dev/null
+++ b/src/Panda/Array.C
@@ -0,0 +1,649 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Array.h"
+
+#include "external/IEEEIO/src/Arch.h"
+
+extern "C" {
+  int IOsizeOf(int);
+  int IOreadAttributeInfo(IOFile, char *,int *, int *);
+  int IOreadAttribute(IOFile,int,void*);
+}
+
+extern int global_system_type_;
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+/***************************************************************************
+ * Class: Array
+ * Description: This is a user-visible class. This is used to describe the
+ *              global array. It also stores pointers to local chunks of 
+ *              data.
+ *
+ * Instance-variables:
+ *        name_  - name of the array
+ *        rank_  - rank of the array (inherited variable)
+ *        size_  - size of the array (elements) in the various dimensions
+ *        element_size_ - size of each array element (in bytes)
+ *        compute_node_layout_ - layout of the compute nodes 
+ *        io_node_layout_ - layout of the io nodes
+ *        subchunk_layout_ - layout of the subchunks
+ *        natural_chunked - whether the array is natural chunked
+ *        compute_node_alloc_policy - chunk dist policy on compute nodes
+ *        io_node_alloc_policy - chunk dist policy on the io nodes
+ ****************************************************************************
+ */
+
+Array::Array() : Template()
+{
+  subchunk_layout_ = NULL;
+  element_size_ = 0;
+  natural_chunked_ = NO;
+  sub_chunked_ = NO;
+  overlap_ = NO;
+  io_strategy_ = SIMPLE_IO;
+}  
+
+/* This function is used on the compute nodes to create the array  *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per      *
+ * compute node and round-robin on the io nodes.                   */
+Array::Array(char *name, int rank, int *sizearray, int elt_size, 
+       ArrayLayout *mem_layout, Distribution *mem_dist, 
+       ArrayLayout *io_layout, Distribution *io_dist):Template(rank, sizearray)
+{
+  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
+	  io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+  /* call function to allocate chunk_list */
+  if (sizearray) allocate_chunks(COMPUTE_NODE);
+  overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array  *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per      *
+ * compute node and round-robin on the io nodes.  Also in this case*
+ * the user specifies the data ptr to be used.                     */
+Array::Array(char *name, int rank, int *sizearray, int elt_size, 
+	     ArrayLayout *mem_layout, Distribution *mem_dist, 
+	     ArrayLayout *io_layout, Distribution *io_dist,
+	     char *data_ptr) : Template(rank, sizearray)
+{
+  char *ptr = data_ptr;
+  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
+	  io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+  /* call function to allocate chunk_list */
+  if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, 0);
+  overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array  *
+ * object. In this case there is no user-specified subchunking and *
+ * the chunk distribution on the compute nodes is 1 chunk per      *
+ * compute node and round-robin on the io nodes.  Also in this case*
+ * the user specifies the data ptr to be used and stencil width.   */
+Array::Array(char *name, int rank, int *sizearray, int elt_size,
+             ArrayLayout *mem_layout, Distribution *mem_dist,
+             ArrayLayout *io_layout, Distribution *io_dist,
+             char *data_ptr, int stencil_width) : Template(rank, sizearray)
+{
+  char *ptr = data_ptr;
+  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist,
+          io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
+  /* call function to allocate chunk_list */
+  if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, stencil_width);
+  if (stencil_width > 0) overlap_ = YES;
+  else overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array  *
+ * object. In this case there is user-specified subchunking and    *
+ * the chunk distribution on the compute nodes is 1 chunk per      *
+ * compute node and round-robin on the io nodes.                   */
+Array::Array(char *name, int rank, int *sizearray, int elt_size, 
+	ArrayLayout *mem_layout, Distribution *mem_dist, 
+	ArrayLayout *io_layout, Distribution *io_dist,
+	ArrayLayout *sub_layout, Distribution* sub_dist)
+        : Template(rank, sizearray)
+{
+  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
+	  io_layout, io_dist, sub_layout, sub_dist, 
+	  REGULAR, ROUND_ROBIN, HPF);
+  /* call function to allocate chunk_list */
+  if (sizearray) allocate_chunks(COMPUTE_NODE);
+  overlap_ = NO;
+}
+
+/* This function is used on the compute nodes to create the array  *
+ * object. In this case there is user-specified subchunking and    *
+ * the chunk distribution on the compute nodes is 1 chunk per      *
+ * compute node and round-robin on the io nodes. This function is  *
+ * used to when the user provides the data_ptr.                    */
+Array::Array(char *name, int rank, int *sizearray, int elt_size, 
+	     ArrayLayout *mem_layout, Distribution *mem_dist, 
+	     ArrayLayout *io_layout, Distribution *io_dist,
+	     ArrayLayout *sub_layout, Distribution* sub_dist,
+	     char *data_ptr) : Template(rank, sizearray)
+{
+  char *ptr = data_ptr;
+  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
+	  io_layout, io_dist, sub_layout, sub_dist, 
+	  REGULAR, ROUND_ROBIN, HPF);
+  /* call function to allocate chunk_list */
+  if (sizearray) allocate_chunks(COMPUTE_NODE, 1, &ptr, 0);
+  overlap_ = NO;
+}
+
+/* Initializes the state of the array object. the chunks are allocated *
+ * via another function                                                */
+void Array::do_init(char *name,  int rank,  int *sizearray,  int elt_size, 
+	    ArrayLayout *mem_layout,       Distribution *mem_dist,
+	    ArrayLayout *io_layout,        Distribution *io_dist,
+	    ArrayLayout *subchunk_layout,  Distribution *subchunk_dist,
+            ChunkAllocPolicy comp_node_policy, ChunkAllocPolicy io_node_policy,
+	    Block_Distribution block_dist)
+{
+  io_strategy_ = SIMPLE_IO;
+
+  name_ = (char *) malloc(sizeof(char)*(strlen(name)+5));
+  strcpy(name_, name);
+  ieee_size_ = elt_size;
+  element_size_ = IOsizeOf(ieee_size_);
+ 
+  compute_node_layout_ = new RegularDistribution(rank, mem_layout, mem_dist,
+				         comp_node_policy, block_dist);
+  if (io_layout) 
+    io_node_layout_ = new RegularDistribution(rank, io_layout, io_dist,
+                                              io_node_policy, block_dist);
+  else io_node_layout_ = NULL;
+  if (subchunk_layout) 
+    subchunk_layout_ = new RegularDistribution(rank, subchunk_layout,
+                                              subchunk_dist, ROUND_ROBIN,
+                                              block_dist);
+  else subchunk_layout_ = NULL;
+
+  /* Check if there is any sub-chunking */
+  if (subchunk_layout_) sub_chunked_ = YES;
+  else sub_chunked_ = NO;
+
+  /* Check if there is any natural chuunking */
+  if (compute_node_layout_->equal(io_node_layout_)) natural_chunked_ = YES;
+  else natural_chunked_ = NO;
+}
+	 
+/* This function is used to initialize the array objects on the io *
+ * node side. 							   */
+Array::Array(int **schema_buf)
+{
+   int* ptr = *schema_buf;
+
+   io_strategy_ 	= *ptr++; 
+   op_type_ 		= *ptr++;
+   int len 		= *ptr++;
+   name_ 		= (char *) malloc(len+1);
+   for (int i=0; i< len; i++) name_[i] = (char) *ptr++;
+   name_[len] = '\0';
+   rank_	    	= *ptr++;
+
+   if (*ptr++ > 0) {
+     size_ = (int *) malloc(sizeof(int) * rank_);
+     for(int i=0; i < rank_; i++) size_[i] = *ptr++;
+   } else size_ = NULL;
+
+   element_size_    	= *ptr++;
+   ieee_size_    	= *ptr++;
+   natural_chunked_ 	= (Boolean) *ptr++;
+   sub_chunked_ 	= (Boolean) *ptr++;
+   overlap_		= (Boolean) *ptr++;
+
+   compute_node_layout_ = unpack_layout(&ptr);
+   io_node_layout_ = unpack_layout(&ptr);
+
+   if (sub_chunked_) subchunk_layout_ = unpack_layout(&ptr);
+   else subchunk_layout_	= NULL;
+
+   *schema_buf = ptr;
+}
+
+ArrayDistribution *Array::unpack_layout(int **schema_buf)
+{
+  int *ptr = *schema_buf;
+  int type = *ptr++;
+  ArrayDistribution *tmp;
+
+  if (type == UNSET) tmp = NULL;
+  else if (type == Regular) tmp = new RegularDistribution(&ptr);
+  else if (type == Irregular) {printf("Irregular is not supported\n"); exit(0);}
+  else tmp = NULL;
+
+  *schema_buf = ptr;
+  return tmp;
+}
+
+/* Allocate chunks - Currently only used on the compute node side */
+void Array::allocate_chunks(int node_type)
+{
+  int my_rank;
+  Chunk *new_chunk;
+
+  if (node_type == COMPUTE_NODE) {
+     /* First find out what kind of system we have (MPI or sequential) */
+     if (global_system_type_ ==  MPI_SYSTEM) {
+       /* Allocate a single chunk with index=compute_node_rank */
+       my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+       new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+       compute_node_layout_->add_last(new_chunk);
+     } else if (global_system_type_ == UNIX_SYSTEM) {
+       /* There is only one kind of Allocation policy */
+       int num = compute_node_layout_->total_elements();
+       for (my_rank=0; my_rank<num; my_rank++) {
+	 new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+         compute_node_layout_->add_last(new_chunk);
+       }
+     } else printf("Unsupported filesystem\n");
+  } else if (node_type == IO_NODE) {
+     printf("Will have to do this later\n");
+  } else {
+     printf("Error: Don't know the node type\n");
+  }
+}
+
+/* Allocate chunks with user-specified data pointer. This function 
+ * currently supports only the REGULAR distribution of chunks in
+ * the MPI-based file system  and 
+ * should be called only on the compute node side 
+ */
+void Array::allocate_chunks(int node_type, int num_ptrs,
+                             char **data_ptr, int stencil_width)
+{
+  int my_rank;
+  Chunk *new_chunk;
+
+  if (node_type == COMPUTE_NODE) {
+     /* First find out what kind of system we have (MPI or sequential) */
+     if (global_system_type_ ==  MPI_SYSTEM) {
+       /* Allocate a single chunk with index=compute_node_rank */
+       my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+       new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
+       new_chunk->set_data_ptr(data_ptr[0]);
+       new_chunk->set_stencil_width(stencil_width);
+       compute_node_layout_->add_last(new_chunk);
+     } else if (global_system_type_ == UNIX_SYSTEM) {
+       /* There is only one kind of Allocation policy */
+       int num = compute_node_layout_->total_elements();
+       for (my_rank=0; my_rank<num; my_rank++) {
+	 new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
+         new_chunk->set_data_ptr(data_ptr[my_rank]);
+         new_chunk->set_stencil_width(stencil_width);
+         compute_node_layout_->add_last(new_chunk);
+       }
+     } else printf("Unsupported filesystem\n");
+  } else if (node_type == IO_NODE) {
+     printf("Will have to do this later\n");
+  } else {
+     printf("Error: Don't know the node type\n");
+  }
+}
+
+Array::~Array()
+{
+    if (name_) free(name_);
+    name_ = NULL;
+    if (compute_node_layout_) delete(compute_node_layout_);
+    if (io_node_layout_)      delete(io_node_layout_);
+    if (subchunk_layout_)     delete(subchunk_layout_);
+    compute_node_layout_ = io_node_layout_ = subchunk_layout_ = NULL;
+}
+
+/* We are not packing the chunk information here */
+void Array::pack(int** schema_buf, int *schema_size)
+{
+  int *ptr, *head;
+  int i, len;
+ 
+  ptr = (int *) malloc(sizeof(int)*100);
+  head = ptr;
+  
+  *ptr++ = io_strategy_;
+  *ptr++ = op_type_;
+  len = strlen(name_);
+  *ptr++ = len;
+  for(i=0; i<len;i++) *ptr++ = (int) name_[i]; 
+  *ptr++ = rank_;
+  if (size_) { *ptr++ = 1; for(int i=0; i < rank_;i++) *ptr++ = size_[i]; }
+  else *ptr++ = 0;
+  *ptr++ = element_size_;
+  *ptr++ = ieee_size_;
+  *ptr++ = (int)natural_chunked_;
+  *ptr++ = (int)sub_chunked_;
+  *ptr++ = (int)overlap_;
+  
+  if (compute_node_layout_) compute_node_layout_->pack(&ptr);
+  else *ptr++ = (int)UNSET;
+  if (io_node_layout_) io_node_layout_->pack(&ptr);
+  else *ptr++ = (int)UNSET;
+  if (sub_chunked_) subchunk_layout_->pack(&ptr);
+
+  *schema_size = (int)(ptr - head);
+  *schema_buf = head;
+}     
+
+ArrayDistribution* Array::layout(int layout_type)
+{
+  switch(layout_type) {
+    case COMPUTE_NODE:
+      return compute_node_layout_;
+    case IO_NODE:
+      return io_node_layout_;
+    case SUB_CHUNK:
+      return subchunk_layout_;
+    default:
+      printf("Invalid type\n");
+      return NULL;
+  }
+}
+
+/* The following two functions are used for regular layouts (HPF-style) only */
+/* Given a chunk index and node type, this function returns the  *
+ * the relative node number on which the chunk resides           */
+int Array::which_node(int chunk_id, int node_type)
+{
+  if (node_type  == COMPUTE_NODE)
+    if (compute_node_layout_->alloc_policy() == REGULAR) return chunk_id;
+    else {
+      printf("Unsupported chunk alloc  type\n");
+      exit(1);
+    }
+  else if (node_type == IO_NODE) {
+    printf("Currently this is unsupported\n");
+    exit(1);
+  } else {
+    printf("Unsupported node type\n");
+    exit(1);
+  }
+  return -1;
+}
+
+int Array::which_node(int chunk_id, int node_type, int num_io_nodes)
+{
+  if (node_type == IO_NODE){
+    switch(io_node_layout_->alloc_policy()){
+    case ROUND_ROBIN:
+      return(chunk_id % num_io_nodes);
+
+    default:
+      printf("Error in which_node(int,int,int).. Invalid distribution type\n");
+      exit(1);
+    }
+  } else if (node_type == COMPUTE_NODE) {
+    switch(compute_node_layout_->alloc_policy()){
+    case REGULAR:
+      return chunk_id;
+
+    default:
+      printf("Error in which_node(int,int,int)... Invalid distribution type\n");
+      exit(1);
+    }
+  } else {
+    printf("Error in which_node(int,int,int)... Invalid node type\n");
+    exit(1);
+  }
+  return -1;
+}
+
+Chunk* Array::get_next_chunk()
+{
+  return compute_node_layout_->get_next_chunk();
+}
+
+/* The following seven functions are called by compute nodes only */
+/* Given a chunk index, find the chunk */
+Chunk* Array::find_chunk(int id)
+{
+  List *list = compute_node_layout_->chunk_list();
+  Cell *list_ptr = list != NULL ? list->head_: NULL;
+  Chunk *chunk_ptr;
+
+  while (list_ptr) {
+    chunk_ptr =   (Chunk *)list_ptr->item();
+    if (chunk_ptr->chunk_id() == id) return chunk_ptr;
+    list_ptr = list_ptr->next();
+  }
+  return NULL;
+}
+
+int Array::element_size(){return element_size_;}
+int Array::ieee_size(){return ieee_size_;}
+
+Boolean Array::nat_chunked(){return natural_chunked_;}
+
+Boolean Array::sub_chunked(){return sub_chunked_;}
+
+/* This function needs to be checked and refined */
+void Array::make_sub_chunks(Chunk *chunk)
+{
+  Distribution *subchunk_dist;
+  int  *subchunk_layout_sizes;
+  int i, tmp_size, dim, val_dim;
+  int *chunk_size = chunk->size();
+
+  if (sub_chunked_) {
+    printf("Error: Array already subchunked\n");
+    exit(1);
+  } else {
+    subchunk_dist = (Distribution *) malloc(sizeof(Distribution)*rank_);
+    subchunk_layout_sizes = (int*) malloc(sizeof(int)*rank_);
+    tmp_size = chunk->total_size_in_bytes();
+    if (tmp_size < SUBCHUNK_SIZE){
+      for(i=0;i<rank_;i++){
+	subchunk_dist[i] = BLOCK;
+	subchunk_layout_sizes[i] = 1;
+      }
+    } else {
+      tmp_size = element_size_;
+      i = rank_;
+      while(tmp_size < SUBCHUNK_SIZE){
+	i--;
+	tmp_size *= chunk_size[i];
+      }
+      dim =i;
+      tmp_size /=chunk_size[i];
+      val_dim = SUBCHUNK_SIZE / tmp_size;
+      for(i=0;i<dim;i++){
+	subchunk_dist[i] = BLOCK;
+	subchunk_layout_sizes[i] = chunk_size[i];
+      }
+      subchunk_dist[dim] = BLOCK;
+      subchunk_layout_sizes[dim] = (chunk_size[i] + val_dim -1)/val_dim;
+      for(i=dim+1;i<rank_; i++){
+	subchunk_dist[i] = BLOCK;
+	subchunk_layout_sizes[i] = 1;
+      }
+    }
+    ArrayLayout *tmp_layout = new ArrayLayout(rank_, subchunk_layout_sizes);
+    subchunk_layout_ = new RegularDistribution(rank_, tmp_layout,
+                                        subchunk_dist, ROUND_ROBIN, HPF);
+    sub_chunked_ = YES;
+    free(subchunk_layout_sizes);
+    free(subchunk_dist);
+  }
+}
+
+int Array::array_info()
+{
+  List *list = compute_node_layout_->chunk_list();
+  Cell *list_ptr = list->head_;
+  Chunk *chunk_ptr;
+  int ret =0;
+
+  while(list_ptr) {
+    chunk_ptr = (Chunk *)list_ptr->item();
+    ret += chunk_ptr->total_size_in_bytes();
+    list_ptr = list_ptr->next();
+  }
+  return ret;
+}
+
+/* Called only on the I/O node side */
+int Array::get_next_index(Chunk *&chunk, int old_val, int io_node_num, 
+			  int num_io_nodes, int max)
+{
+  int ret = io_node_layout_->get_next_index(chunk, old_val, io_node_num,
+					    num_io_nodes, max);
+  if (io_node_layout_->distribution_type() == Regular) 
+    if (ret < max) chunk->init(this, ret, IO_NODE, NO_ALLOC);
+  return ret;
+}
+
+/* This function should be called only on the compute node side and *
+ * make sense only for the regular distribution of chunks,          */
+void Array::set_data_ptr(char *data_ptr)
+{
+  List *list = compute_node_layout_->chunk_list();
+  Chunk *chunk_ptr;
+
+  if (list && list->head_){
+    chunk_ptr = (Chunk *) list->head_->item();
+    chunk_ptr->set_data_ptr(data_ptr);
+  } else {
+    printf("Error: No chunks present - cannot set data ptr\n");
+  }
+}
+
+
+/* This function should be called only on the compute node side and *
+ * make sense only for the regular distribution of chunks,          */
+char* Array::get_data_ptr()
+{
+  List *list = compute_node_layout_->chunk_list();
+  Chunk *chunk_ptr;
+
+  if (list && list->head_){
+    chunk_ptr = (Chunk *) list->head_->item();
+    return ((char *)chunk_ptr->data_ptr());
+  } else {
+    printf("Error: No chunks present - cannot set data ptr\n");
+    return NULL;
+  }
+}
+
+Boolean Array::overlaped()
+{
+  return overlap_;
+}
+
+void Array::read_schema_file(IOFile file_ptr)
+{
+  int *base = (int *)malloc(sizeof(int) * rank_);
+  int *size = (int *)malloc(sizeof(int) * rank_);
+  int index, length, datatype;
+  Chunk *new_chunk;
+
+  index = IOreadAttributeInfo(file_ptr, "chunk_origin", &datatype, &length);
+  if (index >=0 ) { // the attribute exists
+    IOreadAttribute(file_ptr, index, base);
+    index = IOreadAttributeInfo(file_ptr, "chunk_size",&datatype,&length);
+    if (index < 0) { printf("Error in reading attributes\n"); exit(0); }
+    IOreadAttribute(file_ptr, index, size);
+    new_chunk = new Chunk(this, base, size);
+  } else {
+    for (int j=0; j<rank_; j++) base[j] = 0;
+    new_chunk = new Chunk(this, base, size_);
+  }
+  io_node_layout_ = new IrregularDistribution(1, &new_chunk);
+  free(base);
+  free(size);
+}
+
+/* The collective io operation to write out the arrays. */
+void Array::timestep()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+ 
+   op_type_ = TIMESTEP;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_array_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+   }
+}
+ 
+/* The collective io operation to write out the arrays. */
+void Array::checkpoint()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+ 
+   op_type_ = CHECKPOINT;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_array_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+   }
+}
+ 
+/* The collective io operation to read in the arrays from a *
+ * checkpoint file. Currently (for testing purposes) this   *
+ * does not happen.                                         */
+void Array::restart()
+{
+  int *schema, schema_size;
+  int node_type = MPIFS_global_obj->node_type();
+ 
+  op_type_ = RESTART;
+  if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_array_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+  }
+  else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+  else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+  }
+}
+ 
+void Array::read_timestep()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+ 
+   op_type_ = READ_TIMESTEP;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_array_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+   }
+}
+
+int Array::op_type() { return op_type_; }
+int Array::io_strategy() { return io_strategy_; }
+
+void Array::init(int rank, int ieee_size, int *size, int node_type)
+{
+  rank_ = rank;
+  ieee_size_ = ieee_size;
+  element_size_ = IOsizeOf(ieee_size_);
+  size_ = size;
+  if (node_type == COMPUTE_NODE) {
+    int my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
+    Chunk *new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
+    compute_node_layout_->add_last(new_chunk);
+  }
+}
diff --git a/src/Panda/Array.h b/src/Panda/Array.h
new file mode 100644
index 0000000..834fd36
--- /dev/null
+++ b/src/Panda/Array.h
@@ -0,0 +1,88 @@
+#ifndef Array_dot_h
+#define Array_dot_h
+
+#include "List.h"
+#include "ArrayDistribution.h"
+#include "Chunk.h"
+
+#include "external/IEEEIO/src/Arch.h"
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+
+
+class Array : public Template, public Linkable {
+ protected:
+  ArrayDistribution	  *compute_node_layout_;
+  ArrayDistribution	  *io_node_layout_;
+  ArrayDistribution       *subchunk_layout_;
+  int                     element_size_;
+  int                     ieee_size_;
+  char                    *name_;
+  Boolean                 natural_chunked_;
+  Boolean                 sub_chunked_;
+  Boolean                 overlap_;
+  int			  op_type_;
+  int              	  io_strategy_;
+
+  void                    do_init(char*, int, int*, int, 
+				  ArrayLayout*,     Distribution*, 
+				  ArrayLayout*,     Distribution*, 
+				  ArrayLayout*,     Distribution*, 
+				  ChunkAllocPolicy, ChunkAllocPolicy, 
+				  Block_Distribution);
+  void                    allocate_chunks(int);
+  void                    allocate_chunks(int,int,char**,int);
+  ArrayDistribution       *unpack_layout(int **);
+
+ public:
+                           Array(char*,int, int*, int, ArrayLayout*, 
+				 Distribution*, ArrayLayout*,
+				 Distribution*);
+                           Array(char*,int, int*, int, ArrayLayout*, 
+				 Distribution*, ArrayLayout*,
+				 Distribution*, char *);
+                           Array(char*,int, int*, int, ArrayLayout*, 
+				 Distribution*, ArrayLayout*,
+				 Distribution*, ArrayLayout*,
+				 Distribution*);
+                           Array(char*,int, int*, int, ArrayLayout*, 
+				 Distribution*, ArrayLayout*,
+				 Distribution*, ArrayLayout*,
+				 Distribution*, char *);
+			   Array(char*,int, int*, int,
+                                 ArrayLayout*, Distribution*,
+                                 ArrayLayout*, Distribution*, char *, int);
+                           Array(int **);
+                           Array();
+  virtual                 ~Array();
+  void			   init(int,int,int*,int);
+  Chunk*                   get_next_chunk();
+  int                      which_node(int,int,int);
+  void                     delete_chunks();
+  void                     pack(int**, int*);
+  ArrayDistribution*       layout(int);
+  int                      which_node(int,int);
+  Chunk*                   find_chunk(int);
+  int                      element_size();
+  int                      ieee_size();
+  Boolean                  nat_chunked();
+  Boolean                  sub_chunked();
+  void                     make_sub_chunks(Chunk*);
+  int                      array_info();
+  int                      get_next_index(Chunk*&,int,int,int,int);
+  int                      num_of_chunks();
+  void                     set_data_ptr(char *);
+  char*                    get_data_ptr();
+  Boolean                  overlaped();
+  void			   read_schema_file(IOFile);
+
+  void			   timestep();
+  void			   read_timestep();
+  void			   checkpoint();
+  void			   restart();
+  int 			   op_type();
+  int			   io_strategy();
+};
+
+#endif
diff --git a/src/Panda/ArrayDistribution.C b/src/Panda/ArrayDistribution.C
new file mode 100644
index 0000000..04e5226
--- /dev/null
+++ b/src/Panda/ArrayDistribution.C
@@ -0,0 +1,205 @@
+#include "ArrayDistribution.h"
+
+/********************************
+ *      ArrayDistribution       *
+ ********************************/
+Boolean ArrayDistribution::equal(ArrayDistribution *) { return NO; }
+
+int ArrayDistribution::distribution_type()
+{
+  printf("In ArrayDistributon: distribution_type, shouldn't be called\n");
+  return -1;
+}
+
+ArrayDistribution::ArrayDistribution()
+{
+  num_of_chunks_ = 0;
+  chunk_list_ = new List();
+  current_cell_ = NULL;
+}
+
+ArrayDistribution::ArrayDistribution(int **schema_buf) 
+{
+  printf("In ArrayDistributon: init, shouldn't be called\n");
+}
+
+void ArrayDistribution::add_last(Chunk *new_chunk)
+{
+  chunk_list_->add_last(new_chunk);
+  num_of_chunks_++;
+}
+
+ArrayDistribution::~ArrayDistribution()
+{
+  Cell *list_ptr;
+  if (chunk_list_) {
+    list_ptr = chunk_list_->head_;
+    while (list_ptr) {
+      delete list_ptr->item();
+      list_ptr = list_ptr->next();
+    }
+    delete chunk_list_;
+    chunk_list_ = NULL; num_of_chunks_ = 0;
+  }
+}
+
+void ArrayDistribution::pack(int **schema_buf)
+{
+  printf("In ArrayDistributon: pack, shouldn't be called\n");
+}
+
+int ArrayDistribution::get_next_index(Chunk *&chunk, int old_val, 
+				      int io_node_num,
+                                      int num_io_nodes, int max)
+{
+  printf("In ArrayDistributon: get_next_index shouldn't be called\n");
+  return max;
+}
+
+List *ArrayDistribution::chunk_list()
+{
+  return chunk_list_;
+}
+
+Chunk* ArrayDistribution::get_next_chunk()
+{
+  if (current_cell_) current_cell_ = current_cell_->next();
+  else current_cell_ = chunk_list_->head_;
+
+  if (current_cell_) return ((Chunk *)current_cell_->item());
+  return NULL;
+}
+
+int ArrayDistribution::total_elements()
+{
+  printf("In ArrayDistributon: total_elements shouldn't be called\n");
+  return 0;
+}
+
+ChunkAllocPolicy ArrayDistribution::alloc_policy()
+{
+  printf("In ArrayDistributon: alloc_policy, shouldn't be called\n");
+  return ROUND_ROBIN;
+}
+
+void ArrayDistribution::list_clear() { current_cell_ = NULL; }
+
+/********************************
+ *      RegularDistribution     *
+ ********************************/
+RegularDistribution::RegularDistribution(int rank, ArrayLayout *layout,
+                                         Distribution *dist,
+                                         ChunkAllocPolicy alloc_policy,
+                                         Block_Distribution block_dist)
+  : ArrayDistribution()
+{
+  layout_ = new ArrayLayout(layout);
+  rank_ = rank;
+  alloc_policy_ = alloc_policy;
+  dist_ = copy_distribution(rank_, dist);
+  block_dist_ = block_dist;
+}
+
+RegularDistribution::RegularDistribution(int **schema_buf) : ArrayDistribution()
+{
+  int* ptr = *schema_buf;
+  layout_ = new ArrayLayout(&ptr);
+  rank_ = *ptr++;
+  dist_ = new_distribution(&ptr, rank_);
+  alloc_policy_ = (ChunkAllocPolicy)*ptr++;
+  block_dist_ = (Block_Distribution)*ptr++;
+  *schema_buf = ptr;
+}
+
+RegularDistribution::~RegularDistribution()
+{
+  if (layout_) { delete layout_; layout_ = NULL; }
+  if (dist_ ) { free(dist_); dist_ = NULL; }
+}
+
+Boolean RegularDistribution::equal(ArrayDistribution *that)
+{
+  if (!that) return NO;
+
+  RegularDistribution *tmp;
+  if (that->distribution_type() == Regular)
+    tmp = (RegularDistribution *)that;
+  else return NO;
+
+  if (layout_->equal(tmp->layout_) &&
+      equal_distribution(rank_, dist_, tmp->dist_)) return YES;
+  return NO;
+}
+
+ArrayLayout *RegularDistribution::layout()
+{
+  return layout_;
+}
+
+Distribution *RegularDistribution::distribution()
+{
+  return dist_;
+}
+
+void RegularDistribution::pack(int **schema_buf)
+{
+  int* ptr = *schema_buf;
+
+  *ptr++ = (int)Regular;
+  layout_->pack(&ptr);
+  *ptr++ = rank_;
+  pack_distribution(&ptr, rank_, dist_);
+  *ptr++ = (int)alloc_policy_;
+  *ptr++ = block_dist_;
+  *schema_buf = ptr;
+}
+
+int RegularDistribution::distribution_type()
+{
+  return Regular;
+}
+
+int RegularDistribution::total_elements()
+{
+  return layout_->total_elements();
+}
+
+ChunkAllocPolicy RegularDistribution::alloc_policy() { return alloc_policy_; }
+
+int RegularDistribution::get_next_index(Chunk *&chunk, int old_val, 
+					int io_node_num,
+                                        int num_io_nodes, int max)
+{
+  if (old_val == -1) return io_node_num;
+  else return (old_val + num_io_nodes);
+}
+
+Block_Distribution RegularDistribution::block_dist() { return block_dist_; }
+
+/********************************
+ *    IrregularDistribution     *
+ ********************************/
+int IrregularDistribution::distribution_type()
+{
+  return Irregular;
+}
+
+int IrregularDistribution::total_elements()
+{
+  return num_of_chunks_;
+}
+
+int IrregularDistribution::get_next_index(Chunk *&chunk, 
+                                          int old_val, int io_node_num,
+                                          int num_io_nodes, int max)
+{
+  chunk = get_next_chunk();
+  if (chunk == NULL) return max;
+  return chunk->chunk_id();
+}
+
+IrregularDistribution::IrregularDistribution(int num, Chunk **chunk_list)
+			: ArrayDistribution()
+{
+  for (int i=0; i<num; i++) add_last(chunk_list[i]); 
+}
diff --git a/src/Panda/ArrayDistribution.h b/src/Panda/ArrayDistribution.h
new file mode 100644
index 0000000..12e68e1
--- /dev/null
+++ b/src/Panda/ArrayDistribution.h
@@ -0,0 +1,70 @@
+#ifndef ArrayDistribution_dot_h
+#define ArrayDistribution_dot_h
+
+#include "definitions.h"
+#include "List.h"
+#include "ArrayLayout.h"
+#include "Chunk.h"
+
+class Array;
+class ArrayDistribution
+{
+protected:
+  int                           num_of_chunks_;
+  List                          *chunk_list_;
+  Cell                          *current_cell_;
+public:
+                                ArrayDistribution();
+                                ArrayDistribution(int **);
+  virtual                       ~ArrayDistribution();
+  virtual Boolean               equal(ArrayDistribution *);
+  virtual int                   distribution_type();
+  virtual void                  pack(int **);
+  virtual int                   total_elements();
+  virtual ChunkAllocPolicy	alloc_policy();
+  virtual int                   get_next_index(Chunk *&,int,int,int,int);
+  List                          *chunk_list();
+  void                          add_last(Chunk *);
+  Chunk                         *get_next_chunk();
+  void				list_clear();
+};
+
+
+class RegularDistribution : public ArrayDistribution
+{
+  ArrayLayout           *layout_;
+  int                   rank_;
+  Distribution          *dist_;
+  Block_Distribution    block_dist_;
+  ChunkAllocPolicy	alloc_policy_;
+public:
+                        RegularDistribution(int **);
+                        RegularDistribution(int , ArrayLayout *,
+                                            Distribution *, ChunkAllocPolicy,
+                                            Block_Distribution, int*);
+                        RegularDistribution(int , ArrayLayout *,
+                                            Distribution *, ChunkAllocPolicy,
+                                            Block_Distribution);
+                        ~RegularDistribution();
+  Boolean               equal(ArrayDistribution *);
+  ArrayLayout           *layout();
+  Distribution          *distribution();
+  int                   distribution_type();
+  void                  pack(int **);
+  int                   total_elements();
+  ChunkAllocPolicy	alloc_policy();
+  int                   get_next_index(Chunk *&,int,int,int,int);
+  Block_Distribution    block_dist();
+};
+
+class IrregularDistribution : public ArrayDistribution
+{
+public:
+			IrregularDistribution(int, Chunk **);
+  int                   distribution_type();
+  int                   total_elements();
+  int                   get_next_index(Chunk *&,int,int,int,int);
+};
+
+#endif
+
diff --git a/src/Panda/ArrayGroup.C b/src/Panda/ArrayGroup.C
new file mode 100644
index 0000000..afba023
--- /dev/null
+++ b/src/Panda/ArrayGroup.C
@@ -0,0 +1,521 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Array.h"
+#include "ArrayGroup.h"
+
+extern MPIFS *MPIFS_global_obj;
+
+ArrayGroup::ArrayGroup()
+{
+  do_init();
+}
+
+ArrayGroup::ArrayGroup(char *name)
+{
+  do_init();
+  name_ = (char *)malloc(strlen(name)+1);
+  strcpy(name_, name);
+}
+
+
+/* Function to initialize the state of the newly created object */
+void ArrayGroup::do_init()
+{ 
+  num_of_arrays_ = 0;
+  list_ = new List();
+  io_strategy_ = SIMPLE_IO;
+  interleaved_ = NO;
+  common_layouts_ = NO;
+  common_layout_rank_ = 0;
+  compute_layout_ = NULL;
+  compute_distribution_ = NULL;
+  io_layout_ = NULL;
+  io_distribution_ = NULL;
+  group_io_count_ = 0;
+  read_io_count_ =0;
+  checkpoint_count_ = 1;
+  simulate_ = NO;
+  verify_ = NO;
+  name_ = NULL;
+}
+
+void ArrayGroup::clear()
+{
+  if (name_) free(name_);
+  if (compute_layout_ != NULL) delete compute_layout_;
+  if (compute_distribution_ != NULL) delete compute_distribution_;
+  if (io_layout_ != NULL) delete io_layout_;
+  if (io_distribution_ != NULL) delete io_distribution_;
+  if (list_) delete list_;
+  name_ = NULL;
+  compute_layout_ = NULL; compute_distribution_ = NULL;
+  io_layout_ = NULL; io_distribution_ = NULL;
+  list_ = new List();
+}
+
+/* Destructor function - Note  that we don't have to delete the *
+ * arrays in the arraygroup over here. The arrays are deleted   *
+ * by the user							*/
+ArrayGroup::~ArrayGroup()
+{
+  if (name_) free(name_);
+  if (compute_layout_ != NULL) delete compute_layout_;
+  if (compute_distribution_ != NULL) delete compute_distribution_;
+  if (io_layout_ != NULL) delete io_layout_;
+  if (io_distribution_ != NULL) delete io_distribution_;
+  if (list_) delete list_;
+  name_ = NULL;
+  compute_layout_ = NULL;
+  io_layout_ = NULL;
+  compute_distribution_ = NULL;
+  io_distribution_ = NULL;
+  list_ = NULL;
+}
+
+/* Function to delete the arrays in the arraygroup. This is used *
+ * on the io node side to delete the arrays after the collective *
+ * io operation has been completed. On the compute node side, the*
+ * user explicitly deletes the arrays                            */
+void ArrayGroup::delete_arrays()
+{
+   Cell* list_ptr = (list_ != NULL? list_->head_: NULL);
+   Array* array_ptr;
+   
+   while(list_ptr)
+   {
+	array_ptr = (Array *) list_ptr->item();
+        delete array_ptr;
+        list_ptr = list_ptr->next();
+   }
+   if (list_) delete list_;
+   list_ = NULL;
+}
+
+/* Assign id numbers to the arrays in the arraygroup. This function *
+ * must be called at the start of each collective i/o operation.    */
+void ArrayGroup::assign_id()
+{
+   Cell* list_ptr = (list_ != NULL? list_->head_: NULL);
+   Array* array_ptr;
+   int i=0;
+   
+   while(list_ptr)
+   {
+	array_ptr = (Array *) list_ptr->item();
+        array_ptr->set_array_id(i);
+#ifdef DEBUG
+	printf("Assigned Id %d\n", i);
+#endif
+	i++;
+        list_ptr = list_ptr->next();
+   }
+}
+
+/* Insert a new array. Before inserting the array, check to  *
+ * see if it has a common layout with the rest of the arrays */
+void ArrayGroup::insert(Array *new_array)
+{
+   num_of_arrays_++;
+
+   /* Has common layouts since this is the first array */
+   if (num_of_arrays_ == 1)
+   {
+        common_layout_rank_ = new_array->rank();
+	compute_layout_ = new ArrayLayout(new_array->layout(COMPUTE_NODE));
+	compute_distribution_ = copy_distribution(common_layout_rank_, 
+				new_array->distribution(COMPUTE_NODE));
+	io_layout_ = new ArrayLayout(new_array->layout(IO_NODE));
+        io_distribution_ = copy_distribution(common_layout_rank_,
+				new_array->distribution(IO_NODE));
+	common_layouts_ = YES;
+    }
+    else if (common_layouts_)
+    {
+         /* check to see if the array has the same layouts/dist */
+         if ((common_layout_rank_ == new_array->rank()) &&
+	     (compute_layout_->equal(new_array->layout(COMPUTE_NODE))) &&
+             (io_layout_->equal(new_array->layout(IO_NODE))) &&
+             (equal_distribution(common_layout_rank_, compute_distribution_,
+				 new_array->distribution(COMPUTE_NODE))) &&
+	     (equal_distribution(common_layout_rank_, io_distribution_,
+				 new_array->distribution(IO_NODE))))
+	 {
+		common_layouts_ = YES;
+	 }
+         else
+	 {
+		common_layouts_ = NO;
+		if (io_layout_) delete io_layout_;
+  		if (compute_layout_) delete compute_layout_;
+		io_layout_ = compute_layout_ = NULL;
+		if (io_distribution_) free(io_distribution_);
+		if (compute_distribution_) free(compute_distribution_);
+		io_distribution_ = compute_distribution_ = NULL;
+	  }
+    }	
+
+    list_->add_last(new_array);
+}
+
+/* This function is called on the compute node side at the start *
+ * of each collective io operation. The information is packed    *
+ * into an integer buffer. An assumption is made that the a buf  *
+ * of 100 ints is sufficent for each array.                      */
+void  ArrayGroup::pack(int** schema, int* schema_size)
+{
+  int *ptr, *head;
+  int i, len;
+  
+  /* Assuming that schema size of Array is ~= 100 elts */
+  ptr = (int *) malloc(sizeof(int)*100*(num_of_arrays_+1));
+  head = ptr;
+  
+  /* Round about way and space inefficent way of storing a name */
+  *ptr++ = io_strategy_;
+  len = strlen(name_);
+  *ptr++ = len;
+  for(i=0; i<len;i++)
+	*ptr++ = (int) name_[i];
+
+  *ptr++ = num_of_arrays_;
+  *ptr++ = (int) interleaved_;
+  *ptr++ = (int) simulate_;
+  *ptr++ = (int) verify_;
+  *ptr++ = (int) common_layouts_;
+  if (common_layouts_)
+  {
+	*ptr++ = common_layout_rank_ ;
+	compute_layout_->pack(&ptr);
+        pack_distribution(&ptr, common_layout_rank_, compute_distribution_);
+	io_layout_->pack(&ptr);
+	pack_distribution(&ptr, common_layout_rank_, io_distribution_);
+  }
+  *ptr++ = group_io_count_;
+  *ptr++ = checkpoint_count_;
+  *ptr++ = op_type_;
+
+  pack_arrays(&ptr, common_layouts_);
+  
+  *schema_size = (int)(ptr - head);
+  *schema = head;
+}	
+
+/* This function is called on the I/O node side. After receiving *
+ * the collective io schema, the information is unpacked. The    *
+ * arrays are unpacked seperately via a another function call    */	
+void ArrayGroup::unpack(int **schema_ptr)
+{
+   int *ptr = *schema_ptr;
+   int len;
+ 
+   /* Unpack the name */
+   io_strategy_ = *ptr++;
+   len = *ptr++;
+   name_ = (char *) malloc(len+1);
+   for(int i=0; i< len; i++)
+	name_[i] = (char) *ptr++;
+   name_[len] = '\0';
+
+   num_of_arrays_ = *ptr++;
+   interleaved_ = (Boolean) *ptr++;
+   simulate_ = (Boolean) *ptr++;
+   verify_   = (Boolean) *ptr++;
+   common_layouts_ = (Boolean) *ptr++;
+   if (common_layouts_)
+   {  
+	common_layout_rank_ = *ptr++;
+        compute_layout_ = new ArrayLayout(&ptr);
+        compute_distribution_ = new_distribution(&ptr, common_layout_rank_);
+	io_layout_ = new ArrayLayout(&ptr);
+	io_distribution_ = new_distribution(&ptr, common_layout_rank_);
+   }
+   else
+   {
+	common_layout_rank_ = 0;
+        compute_layout_ = io_layout_ = NULL;
+        compute_distribution_ = io_distribution_ = NULL;
+    }
+    group_io_count_ = *ptr++;
+    checkpoint_count_ = *ptr++;
+    op_type_ = *ptr++;
+
+    /* Arrays are being unpacked seperately */
+    *schema_ptr = ptr;
+}
+
+void ArrayGroup::unpack_arrays(int **schema_buf)
+{
+  Array *array;
+  int i, *ptr = *schema_buf;
+  
+  if (common_layouts_){
+    for(i=0;i<num_of_arrays_;i++){
+      array = new Array(&ptr, common_layouts_, compute_layout_,
+			compute_distribution_, io_layout_, 
+			io_distribution_);
+      list_->add_last(array);
+    }
+  } else {
+    for(i=0;i<num_of_arrays_;i++){
+      array = new Array(&ptr, common_layouts_);
+      list_->add_last(array);
+    }
+  }
+  *schema_buf = ptr;
+}
+
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::timestep()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+  /* Assign id numbers to each array */
+  assign_id();
+
+   op_type_ = TIMESTEP;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+   }
+   /* Commented out for testing purposes */
+//   group_io_count_++;
+}
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::general_write()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+  /* Assign id numbers to each array */
+  assign_id();
+
+   op_type_ = GENERAL_WRITE;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+   }
+   /* Commented out for testing purposes */
+//   group_io_count_++;
+}
+
+
+/* The collective io operation to write out the arrays. */
+void ArrayGroup::checkpoint()
+{
+   int *schema, schema_size;
+   int node_type = MPIFS_global_obj->node_type();
+  /* Assign id numbers to each array */
+  assign_id();
+
+   if (checkpoint_count_ == 0) 
+     checkpoint_count_ = 1;
+   else 
+     checkpoint_count_ = 0;
+
+   op_type_ = CHECKPOINT;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
+   }
+   
+}
+
+
+
+/* The collective io operation to read in the arrays from a *
+ * checkpoint file. Currently (for testing purposes) this   *
+ * does not happen.                                         */
+void ArrayGroup::restart()
+{
+  int *schema, schema_size;
+  int node_type = MPIFS_global_obj->node_type();
+
+    /* Assign id numbers to each array */
+    assign_id();
+
+    op_type_ = RESTART;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+   }
+}
+
+
+void ArrayGroup::read_timestep()
+{
+  int *schema, schema_size;
+  int node_type = MPIFS_global_obj->node_type();
+
+    /* Assign id numbers to each array */
+    assign_id();
+
+    op_type_ = READ_TIMESTEP;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+   }
+   /* Commented out for testing purposes */
+//  read_io_count_++;
+}
+
+
+void ArrayGroup::general_read()
+{
+  int *schema, schema_size;
+  int node_type = MPIFS_global_obj->node_type();
+
+    /* Assign id numbers to each array */
+    assign_id();
+
+    op_type_ = GENERAL_READ;
+   if (node_type == COMPUTE_NODE){
+     MPIFS_global_obj->send_group_schema(this);
+     MPIFS_global_obj->compute_node_io_loop(this);
+   }
+   else if (node_type == PART_TIME_COMPUTE)
+     MPIFS_global_obj->compute_node_io_loop(this);
+   else {
+     pack(&schema, &schema_size);
+     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
+   }
+   /* Commented out for testing purposes */
+//  read_io_count_++;
+}
+
+
+
+  
+/* Given an array id find the array object in the array group *
+ * The code caches the previous search value and starts the   *
+ * search from there. This helps especially in the case of    *
+ * when the arrays are accessed sequentially                  */
+Array* ArrayGroup::find_array(int array_id)
+{
+  Cell* list_ptr = (list_ != NULL ? 
+		(list_->old_search_val_ != NULL ? list_->old_search_val_ :
+			 list_->head_)  
+		: NULL);
+  Array* array_ptr;
+  
+  while(list_ptr)
+  { 
+	array_ptr = (Array *) list_ptr->item();
+	if (array_ptr->array_id() == array_id)
+	{
+	   list_->old_search_val_ = list_ptr->next();
+	    return array_ptr;
+ 	}
+	list_ptr = list_ptr->next();
+  }
+  
+  list_ptr = list_->head_;
+  while (list_->old_search_val_ && (list_ptr != list_->old_search_val_))
+  { 
+	array_ptr = (Array *) list_ptr->item();
+	if (array_ptr->array_id() == array_id)
+	{
+	   list_->old_search_val_ = list_ptr->next();
+	    return array_ptr;
+ 	}
+	list_ptr = list_ptr->next();
+  }
+  return NULL;
+}
+
+/* Pack the arrays into an integer schema buffer. Assumes that the * 
+ * data is already allocated.                                      */
+void ArrayGroup::pack_arrays(int **schema_buf, Boolean common_layouts)
+{
+  Cell* list_ptr = (list_ != NULL ? list_->head_ : NULL);
+  Array* array_ptr;
+  
+  while(list_ptr)
+  { 
+	array_ptr = (Array *) list_ptr->item();
+        array_ptr->pack(schema_buf, common_layouts);
+	list_ptr = list_ptr->next();
+  }
+  
+}
+
+
+int ArrayGroup::op_type(){return op_type_;}
+
+void ArrayGroup::set_simulate(){simulate_ = YES;}
+
+void ArrayGroup::reset_simulate(){simulate_ = NO;}
+
+void ArrayGroup::set_simulate_mode(){simulate_ = YES;}
+
+void ArrayGroup::reset_simulate_mode(){simulate_ = NO;}
+
+Boolean ArrayGroup::simulate(){return simulate_;}
+
+
+void ArrayGroup::set_verify(){verify_ = YES;}
+
+void ArrayGroup::reset_verify(){verify_ = NO;}
+
+void ArrayGroup::set_verify_mode(){verify_ = YES;}
+
+void ArrayGroup::reset_verify_mode(){verify_ = NO;}
+
+Boolean ArrayGroup::verify(){return verify_;}
+
+/* This function is called on each compute node side and after all the
+ * arrays have been assigned an id. For each array in the arraygroup,
+ * the function computes the total number of bytes on the compute node
+ */
+void ArrayGroup::init_array_info(int *num_arrays, int **array_bytes_to_go)
+{
+  int *tmp_buf = (int *) malloc(sizeof(int)*num_of_arrays_);
+  *array_bytes_to_go = tmp_buf;
+  *num_arrays = num_of_arrays_;
+
+  for(int i=0; i< num_of_arrays_; i++)
+    tmp_buf[i] = find_array(i)->array_info();
+}
+ 
+int ArrayGroup::io_strategy(){
+  return io_strategy_;
+}
+
+void ArrayGroup::set_io_strategy(int new_strategy){
+  io_strategy_ = new_strategy;
+}
+
+int ArrayGroup::num_of_arrays()
+{
+  return num_of_arrays_;
+}
diff --git a/src/Panda/ArrayGroup.h b/src/Panda/ArrayGroup.h
new file mode 100644
index 0000000..0cd741b
--- /dev/null
+++ b/src/Panda/ArrayGroup.h
@@ -0,0 +1,75 @@
+#ifndef Arraygroup_dot_h
+#define Arraygroup_dot_h
+
+class Array;
+class ArrayLayout;
+#include "List.h"
+#include "definitions.h"
+
+class ArrayGroup {
+ protected:
+  char             *name_;           /* Name of the arraygroup */
+  int              num_of_arrays_;   /* Number of arrays in group */
+  List             *list_;           /* List of arrays */
+  int              io_strategy_;
+
+  /* If all the arrays have the same io and compute node layouts */
+  Boolean          common_layouts_;
+  int              common_layout_rank_;
+  ArrayLayout      *compute_layout_;
+  Distribution     *compute_distribution_;
+  ArrayLayout      *io_layout_;
+  Distribution     *io_distribution_;
+
+ 
+  int              group_io_count_;
+  int              read_io_count_;
+  int              checkpoint_count_;
+  int              op_type_;
+
+
+  Boolean          interleaved_;     
+  Boolean          simulate_;
+  Boolean          verify_;
+
+  void             do_init();
+  void             delete_arrays();
+  void             assign_id();
+  void             pack_arrays(int**, Boolean);
+
+ public:
+
+                   ArrayGroup();
+                   ArrayGroup(char *);
+  virtual         ~ArrayGroup();
+  void             insert(Array*);
+  void             pack(int**, int*);
+  void             unpack(int**);
+  void             timestep();
+  void             general_write();
+  void             checkpoint();
+  void             restart();
+  void             read_timestep();
+  void             general_read();
+  Array           *find_array(int);
+  int              op_type();
+  void             set_simulate();
+  void             reset_simulate();
+  void             set_simulate_mode();
+  void             reset_simulate_mode();
+  Boolean          simulate();
+  Boolean          verify();
+  void             set_verify();
+  void             reset_verify();
+  void             set_verify_mode();
+  void             reset_verify_mode();
+  void             unpack_arrays(int**);
+  void             init_array_info(int*,int**);
+  void             set_io_strategy(int);
+  int              io_strategy();
+  int              num_of_arrays();
+  void		   clear();
+};
+
+#endif
+
diff --git a/src/Panda/ArrayLayout.C b/src/Panda/ArrayLayout.C
new file mode 100644
index 0000000..1398ef4
--- /dev/null
+++ b/src/Panda/ArrayLayout.C
@@ -0,0 +1,179 @@
+#include "definitions.h"
+#include "ArrayLayout.h"
+
+ArrayLayout::ArrayLayout(int Rank, int *sizearray):Template(Rank, sizearray){}
+
+/* Create an arraylayout object using info stored in the schema buffer */
+ArrayLayout::ArrayLayout(int **schema_buf)
+{
+  int* ptr = *schema_buf;
+  
+  rank_ = *ptr++;
+  size_ = (int *) malloc(sizeof(int)*rank_);
+  for(int i=0; i < rank_; i++)
+	size_[i] = *ptr++;
+
+  *schema_buf = ptr;
+}
+
+/* Make a copy of an existing ArrayLayout object */
+ArrayLayout::ArrayLayout(ArrayLayout *old_layout)
+{
+   rank_ = old_layout->rank();
+   size_ = copy_int_list(rank_, old_layout->size());
+}
+
+/* Use the destructor of  the Template object */
+ArrayLayout::~ArrayLayout()
+{
+}
+
+/* converts a chunk index to a number */
+int ArrayLayout::convert_from_index_to_number(int *indices)
+{
+   int result=0, temp_product=1;
+   for(int i=rank_-1; i>=0; i--)
+   {
+      result += temp_product * indices[i];
+      temp_product *= size_[i];
+   }
+   return result;
+}
+  
+/* converts a number to the appropriate chunk index */
+void  ArrayLayout::convert_from_number_to_index(int num, int *result)
+{
+  int  temp_product=1;
+  int  i, j;
+  
+  for(i = 0; i< rank_; i++)
+  {
+     temp_product = 1;
+     for(j = i+1 ; j < rank_; j++)
+         temp_product *= size_[j];
+     result[i] = num / temp_product;
+     num -= num/temp_product *temp_product;
+  }
+}
+    
+/* converts a number to the appropriate chunk index */  
+int* ArrayLayout::convert_from_number_to_index(int num)
+{
+  int* result = (int *) malloc(sizeof(int)*rank_);
+  convert_from_number_to_index(num, result);
+  return result;
+}
+
+/* Check if the input indices are valid. Assumes that the rank of *
+ * input indices are the same as rank of the layout               */
+Boolean ArrayLayout::valid_index(int *indices)
+{
+  if (indices == NULL) return NO;
+  else for(int i=0; i<rank_; i++)
+       {  
+           if ((indices[i] < 0) || (indices[i] >= size_[i]))
+		return NO;
+       }
+  return YES;
+}
+
+/* Checks if the specified input distribution of the array is   *
+ * compatible with the layout. It is compatible only if the     *
+ * number of dimensions in which the array is distributed in a  *
+ * BLOCK or CYCLIC fashion is equal to the rank of the layout   */
+Boolean ArrayLayout::valid_distribution(int array_rank, Distribution* dist)
+{
+  if ((array_rank <= 0) || (dist == NULL)) return NO;
+  else {
+         int block_or_cyclic=0, i;
+         for (i=0;i<array_rank;i++)
+		if ((dist[i]==BLOCK)||(dist[i]==CYCLIC)) block_or_cyclic++;
+         if (block_or_cyclic != rank_)
+                return NO;
+         else 
+		return YES;
+       }
+}
+
+
+int ArrayLayout::size(int i){return size_[i];}
+       
+
+
+
+/* This function is used to return a linked list of numbers (representing the
+ * indices of the compute node chunks which overlap with the io node chunk)
+ * given the base, size of the overlapping layout (??).
+ *
+ * The function assumes that the input is valid
+ */
+void  ArrayLayout::indices_list(int *index_base, int *index_size, 
+				int *num, int *ret_list) 
+{
+   int *ptr=ret_list;
+   int size=1;
+   for(int i=0; i < rank(); i++)
+	size *= index_size[i];
+   *num = size;
+   calculate_indices(index_base, index_size, rank(), 0, &ptr);
+}
+
+/* Recursive function to convert a layout into a list of numbers */
+void ArrayLayout::calculate_indices(int *index_base, int *index_size, 
+					int my_rank, int sum, int **buf_ptr)
+{
+  int prod=1, i;
+  int *ptr;
+
+#ifdef DEBUG
+  printf("In calculate indices rank=%d sum=%d *buf=%ld\n", my_rank, sum, *buf_ptr);
+#endif
+  if (my_rank > 1)
+  {
+     for(i=rank()-1; i > (rank() - my_rank) ; i--)
+	prod *= size_[i];
+     for(i=0 ; i < index_size[rank()-my_rank]; i++)
+	calculate_indices(index_base, index_size, my_rank-1,
+		sum + (index_base[rank()-my_rank]+i)*prod, buf_ptr);
+  }
+  else
+  {
+    for(i=0; i < index_size[rank()-my_rank]; i++)
+    {
+        ptr = *buf_ptr;
+	*ptr++ = sum + (index_base[rank()-my_rank]+i);
+#ifdef DEBUG
+        printf("In calculate indices *buf=%ld val=%d\n", *buf_ptr, **buf_ptr);
+#endif
+	*buf_ptr = ptr;
+    }
+  }
+}
+
+/* Pack the info into the schema buffer */
+void ArrayLayout::pack(int **schema_buf)
+{
+  int* ptr = *schema_buf;
+   
+  *ptr++ = rank_;
+  for(int i=0; i< rank_; i++)
+     *ptr++ = size_[i];
+
+  *schema_buf = ptr;
+}
+
+/* Check if the two layouts are equal */
+Boolean ArrayLayout::equal(ArrayLayout *layout)
+{
+  if (rank_ != layout->rank()) return NO;
+  for(int i=0; i<rank_; i++)
+	if (size_[i] != layout->size(i)) return NO;
+  return YES;
+}
+
+
+
+int* ArrayLayout::size(){return size_;}
+
+
+
diff --git a/src/Panda/ArrayLayout.h b/src/Panda/ArrayLayout.h
new file mode 100644
index 0000000..d5e0a65
--- /dev/null
+++ b/src/Panda/ArrayLayout.h
@@ -0,0 +1,26 @@
+#ifndef ArrayLayout_dot_h
+#define ArrayLayout_dot_h
+
+#include "Template.h"
+
+class ArrayLayout : public Template {
+ /* Inherits rank_,size_ from Template */
+ public:
+                ArrayLayout(int Rank, int *sizearray);
+                ArrayLayout(int** schema_buf);        
+                ArrayLayout(ArrayLayout *old_layout); 
+     virtual    ~ArrayLayout();
+     void       pack(int** schema_buf);          
+     int        convert_from_index_to_number(int *indices);
+     int*       convert_from_number_to_index(int num);
+     void       convert_from_number_to_index(int num, int *result);
+     Boolean    valid_index(int *);             
+     Boolean    valid_distribution(int, Distribution*);
+     Boolean    equal(ArrayLayout*);
+     int        size(int);
+     int*       size();
+     void       indices_list(int*, int*, int*, int*);
+     void       calculate_indices(int*,int*,int,int,int**);
+};
+
+#endif
diff --git a/src/Panda/Attribute.C b/src/Panda/Attribute.C
new file mode 100644
index 0000000..0c50f04
--- /dev/null
+++ b/src/Panda/Attribute.C
@@ -0,0 +1,187 @@
+#include "definitions.h"
+#include "Attribute.h"
+#include "MPIFS.h"
+#include "string.h"
+
+
+extern MPIFS *MPIFS_global_obj;
+extern "C" {
+  int IOwriteAttribute(IOFile,char*,int,int,void *);
+  int IOsizeOf(int);
+  int IOreadAttributeInfo(IOFile,char*,int*,int*); 
+  int IOreadAttribute(IOFile,int,void*); 
+//  IOFile IEEEopen(char *,char *);
+}
+
+Attribute::Attribute() 
+{ 
+  name_ = NULL; 
+  data_status_ = 0; 
+  data_ = NULL; 
+}
+
+void Attribute::init(char *name)
+{
+  int len = strlen(name);
+  name_ = (char *)malloc(sizeof(char) * (len + 1));
+  for (int i=0; i<len; i++) name_[i] = name[i];
+  name_[i] = '\0';
+}
+
+void Attribute::init(char *name, int esize, int count, void *data)
+{
+  int len = strlen(name);
+  name_ = (char *)malloc(sizeof(char) * (len + 1));
+  for (int i=0; i<len; i++) name_[i] = name[i];
+  name_[i] = '\0';
+  esize_ = esize;
+  count_ = count;
+  data_ = data;
+  data_status_ = 0;
+}
+
+Attribute::~Attribute()
+{
+  if (name_) free(name_); 
+  if (data_status_ && data_) free(data_); 
+}
+
+void Attribute::pack(int &schema_len, char *&schema, char *fname, int op_type)
+{
+  union int_to_char tmp;
+  int i, real_size = IOsizeOf(esize_);
+
+  int len1 = strlen(fname);
+  int len = strlen(name_);
+  if (op_type == TIMESTEP) 
+    schema_len = 5 * sizeof(int) + len1 + len + real_size * count_;
+  else schema_len = 3 * sizeof(int) + len1 + len;
+  schema = (char *)malloc(sizeof(char) * schema_len);
+  char *ptr = schema;
+
+  tmp.i = op_type;
+  for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+  tmp.i = len1;
+  for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+  for (i=0; i<len1; i++) *ptr++ = fname[i];
+  tmp.i = len;
+  for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+  for (i=0; i<len; i++) *ptr++ = name_[i];
+
+  if (op_type == TIMESTEP) {
+    tmp.i = esize_;
+    for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+    tmp.i = count_;
+    for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+    memcpy(ptr, data_, real_size * count_);
+  }
+}
+
+Attribute::Attribute(char *schema, int op_type)
+{
+  union int_to_char tmp;
+  int i, len, real_size;
+  char *ptr = schema;
+
+  for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+  len = tmp.i;
+  name_ = (char *)malloc(sizeof(char) * (len + 1)); 
+  for (i=0; i<len; i++) name_[i] = *ptr++;
+  name_[i] = '\0';
+
+  if (op_type == TIMESTEP) {
+    for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+    esize_ = tmp.i;
+    real_size = IOsizeOf(esize_);
+    for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+    count_ = tmp.i;
+    data_ = (void *)malloc(esize_ * count_);
+    memcpy(data_, ptr, real_size * count_); 
+    data_status_ = 1;
+  } 
+}  
+
+void Attribute::read(char *fname, char *n)
+{
+  int node_type = MPIFS_global_obj->node_type();
+  IOFile fp;
+
+  if (node_type == PART_TIME_COMPUTE || node_type == COMPUTE_NODE) {
+    if (MPIFS_global_obj->am_master_compute_node()) { 
+      init(n);
+      MPIFS_global_obj->send_attr_schema(this, fname, READ_TIMESTEP);
+    }
+    MPIFS_global_obj->receive_attr_data(this);
+  } else { // PART_TIME_IO
+    init(n);
+    if (MPIFS_global_obj->am_master_compute_node()) 
+      MPIFS_global_obj->send_attr_schema(this, fname, READ_TIMESTEP);
+    MPIFS_global_obj->receive_attr_schema(); 
+
+    int len = strlen(fname);
+    char *name = (char *)malloc(sizeof(char) * (len+1));
+    char *name1 = (char *)malloc(sizeof(char) * (len+6));
+    for (int i=0; i<len; i++) name[i] = fname[i];
+    name[i] = '\0';
+    sprintf(name1, "%s.%d", name, MPIFS_global_obj->my_rank(IO_NODE));
+    fp = MPIFS_global_obj->open_file(name1, READ_TIMESTEP);   
+    read_data(fp);
+    if (MPIFS_global_obj->am_master_io_node()) {
+      MPIFS_global_obj->send_attr_data(this);
+    }
+    MPIFS_global_obj->receive_attr_data(this);
+    free(name); 
+  }
+}
+   
+void Attribute::write(char *fname, char *n, int esize, int count, void *data)
+{
+  int node_type = MPIFS_global_obj->node_type();
+
+  if (node_type == PART_TIME_COMPUTE || node_type == COMPUTE_NODE) {
+    if (MPIFS_global_obj->am_master_compute_node()) { 
+      init(n, esize, count, data);  
+      MPIFS_global_obj->send_attr_schema(this, fname, TIMESTEP);
+    }
+  } else { // PART_TIME_IO
+    init(n, esize, count, data);  
+    if (MPIFS_global_obj->am_master_compute_node()) 
+      MPIFS_global_obj->send_attr_schema(this, fname, TIMESTEP);
+    MPIFS_global_obj->receive_attr_schema(); 
+
+    IOFile fp;
+    int len = strlen(fname);
+    char *name = (char *)malloc(sizeof(char) * (len+1));
+    char *name1 = (char *)malloc(sizeof(char) * (len+6));
+    for (int i=0; i<len; i++) name[i] = fname[i];
+    name[i] = '\0';
+    sprintf(name1, "%s.%d", name, MPIFS_global_obj->my_rank(IO_NODE));
+  
+    fp = MPIFS_global_obj->open_file(name1, TIMESTEP);
+  
+    write_data(fp);
+    free(name);
+  }
+}
+
+void Attribute::write_data(IOFile fp) 
+{
+  IOwriteAttribute(fp, name_, esize_, count_, data_);
+}
+  
+void Attribute::read_data(IOFile fp) 
+{
+  int index = IOreadAttributeInfo(fp, name_, &esize_, &count_); 
+  if (index >= 0) {
+    data_ = (void *)malloc(IOsizeOf(esize_) * count_);
+    IOreadAttribute(fp, index, data_);
+  } else printf("Fail to read attribute %s\n", name_);
+}
+
+void *Attribute::get_data_ptr() { return data_; }
+void Attribute::set_data_ptr(void *d) { data_ = d; }
+int Attribute::data_size() { return IOsizeOf(esize_) * count_; }
+int Attribute::esize() { return esize_; }
+int Attribute::count() { return count_; }
+void Attribute::set_count(int c) { count_ = c; }
+void Attribute::set_esize(int e) { esize_ = e; }
diff --git a/src/Panda/Attribute.h b/src/Panda/Attribute.h
new file mode 100644
index 0000000..d948316
--- /dev/null
+++ b/src/Panda/Attribute.h
@@ -0,0 +1,43 @@
+#ifndef Attribute_dot_h
+#define Attribute_dot_h
+
+#include "definitions.h"
+
+
+typedef union int_to_char {
+  int i;
+  char c[4];
+} int_to_char;
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+#include "external/IEEEIO/src/Arch.h"
+
+class Attribute {
+  char 	*name_;
+  int 	esize_;
+  int	count_;
+  void	*data_;
+  int	data_status_; // 0: no alloc, 1: alloc
+
+public:
+	Attribute();
+	Attribute(char *, int);
+ 	~Attribute(); 
+  void 	init(char *, int, int, void *);
+  void 	init(char *);
+  void 	pack(int &, char *&, char *, int);
+  void 	write(char *, char *, int, int, void *);
+  void 	read(char *, char *);
+  void	write_data(IOFile);
+  void	read_data(IOFile);
+  void	*get_data_ptr();
+  void	set_data_ptr(void *);
+  int 	data_size();
+  int   esize();
+  int   count();
+  void  set_esize(int);
+  void  set_count(int);
+};
+
+#endif
diff --git a/src/Panda/CSDIO.C b/src/Panda/CSDIO.C
new file mode 100644
index 0000000..b2d4064
--- /dev/null
+++ b/src/Panda/CSDIO.C
@@ -0,0 +1,694 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "CSDIO.h"
+#include "List.h"
+
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+/* This code is executed on the compute nodes (excluding the part-time i/o 
+ * nodes). 
+ */
+void CSDIO::compute_node_io_loop(ArrayGroup *group)
+{
+  int array_idx;
+  Boolean read_op;
+
+  op_type_ = group->op_type();
+  if ((op_type_ == RESTART) || (op_type_ == GENERAL_READ) ||
+      (op_type_ == READ_TIMESTEP)){
+    read_op = YES;
+  } else {
+    read_op = NO;
+  }
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+  num_of_arrays_ = group->num_of_arrays();
+  receive_io_app_info();
+  num_io_nodes_ = io_app_info_->app_size();
+#ifdef DEBUG
+  printf("%d: op_type_ = %d read_op =%d\n", world_rank_, op_type_, read_op);
+  printf("%d: Compute node - num of arrays %d - num of io_nodes %d\n",
+	world_rank_, num_of_arrays_, num_io_nodes_);
+#endif
+  comp_current_array_ = new Array();
+  comp_current_array_id_ = -1;
+
+  for(array_idx = 0; array_idx < num_of_arrays_; array_idx++){
+      while(!process_compute_side_array(group, array_idx, read_op)){};
+  }
+  delete comp_current_array_;
+  comp_current_array_ = NULL;
+}
+
+/* An array is stored in the comp_current_array_. this must be instatntiated
+ * before use. If the input array_id is the same as that stored in
+ * comp_current_array_id_, then it means that all the required sends/recvs
+ * have been posted and all we have to do is to verify its completion. If 
+ * they are different, then it means that we have to start the i/o for the
+ * new array.
+ */
+Boolean CSDIO::process_compute_side_array(ArrayGroup *group,
+				   int array_idx, Boolean read_op)
+{
+  int make_subchunks=-1, tag, tag_ctr=0,  buf_size, bytes_to_go, flag, i;
+  char *tmp_buf;
+  void *void_buf;
+  Chunk *compute_chunk=NULL, *io_chunk=NULL, *subchunk=NULL;
+
+  if (comp_current_array_id_ != array_idx){
+    /* We have to post the sends/recvs for this array*/
+
+    comp_current_array_->copy(group->find_array(array_idx));
+    comp_array_rank_ = comp_current_array_->rank();
+    if (comp_array_rank_ > max_comp_rank_){
+      realloc_compute_schema_bufs(comp_array_rank_);
+    }
+
+    nat_chunked_ = comp_current_array_->nat_chunked();
+    sub_chunked_ = comp_current_array_->sub_chunked();
+    if (nat_chunked_ && !sub_chunked_) 
+      contiguous_ = YES;
+    else
+      contiguous_ = NO;
+    compute_pending_ = 0;
+
+    if (contiguous_){
+	/* Nat chunking with no user-specified chunking. We don't need
+	 * to use any MPI dervied datatypes. 
+	 */
+      comp_current_array_->list_clear();
+      compute_chunk = comp_current_array_->get_next_chunk();
+      while(compute_chunk != NULL){
+	comp_current_chunk_id_ = compute_chunk->chunk_id();
+	io_overlaps_ = 1;
+	io_overlap_chunk_ids_[0] = comp_current_chunk_id_;
+	io_dest_ids_[0] = io_app_info_->world_rank(comp_current_array_->which_node(
+					     comp_current_chunk_id_,
+				        IO_NODE, num_io_nodes_));
+	
+	if (io_dest_ids_[0] == world_rank_){
+	  /* Part-time case - do nothing, the io node should take
+	     care of this */
+	}
+	else {
+	  bytes_to_go = compute_chunk->total_size_in_bytes();
+	  tmp_buf = (char *)compute_chunk->data_ptr();
+	  tag_ctr = 0;
+	  while(bytes_to_go > 0){
+	    buf_size = min(SUBCHUNK_SIZE, bytes_to_go);
+	    if (compute_pending_ >= max_pending_){
+	      realloc_pending_messages(compute_pending_+1);
+	    }
+	    
+	    tag = comp_current_chunk_id_ * 1000 + tag_ctr*10;
+	    if (read_op)
+	      nb_receive_message((void *) tmp_buf, buf_size, MPI_CHAR,
+				 io_dest_ids_[0], tag + CHUNK_DATA_FROM_IO,
+				 MPI_COMM_WORLD, 
+				 &comp_requests_[compute_pending_]);
+	    else
+	      nb_send_message((void *) tmp_buf, buf_size, MPI_CHAR,
+			       io_dest_ids_[0], tag + CHUNK_DATA_TO_IO,
+				MPI_COMM_WORLD, 
+				&comp_requests_[compute_pending_]);
+				
+	    tag_ctr++;
+	    tmp_buf += buf_size;
+	    bytes_to_go -= buf_size;
+	    compute_pending_++;
+	  }
+	}
+	
+	compute_chunk = comp_current_array_->get_next_chunk();
+      }
+      
+      comp_current_array_->list_clear();
+    } /* End if contiguous */
+    else {
+      /* We have to use mpi-derived datatypes */
+      make_subchunks = -1;
+      io_chunk = new Chunk();
+      subchunk = new Chunk();
+      comp_current_array_->list_clear();
+      
+      compute_chunk = comp_current_array_->get_next_chunk();
+      while (compute_chunk != NULL){
+	comp_current_chunk_id_ = compute_chunk->chunk_id();
+
+	/* Determine the overlapping I/O chunks */
+	io_chunk_overlaps(comp_current_array_, compute_chunk);
+	for( i=0;i< io_overlaps_;i++){
+	  if (io_dest_ids_[i] != world_rank_){
+	    /* Different node- so we have to post the send/recv */
+	    io_chunk->init(comp_current_array_, io_overlap_chunk_ids_[i],
+			   IO_NODE, NO_ALLOC);
+	    if (!sub_chunked_ && (make_subchunks == -1)){
+	      comp_current_array_->make_sub_chunks(io_chunk);
+	      make_subchunks = 1;
+	    }
+	    
+	    tag_ctr=0;
+	    comp_num_of_subchunks_ = 
+	      comp_current_array_->layout(SUB_CHUNK)->total_elements();
+#ifdef DEBUG
+	    printf("comp_num_of_subchunks_ = %d\n", comp_num_of_subchunks_);
+#endif
+	    for(comp_current_subchunk_id_ = 0; 
+		comp_current_subchunk_id_ < comp_num_of_subchunks_;
+		comp_current_subchunk_id_++){
+#ifdef DEBUG
+	    printf("io_chunk = %d subchunk_id = %d\n", 
+		   io_chunk->chunk_id(), comp_current_subchunk_id_);
+#endif
+	      subchunk->init(io_chunk, comp_current_subchunk_id_, NO_ALLOC);
+	      subchunk->compute_overlap(compute_chunk, comp_overlap_base_,
+					comp_overlap_size_, comp_overlap_stride_);
+	      buf_size = num_elements(comp_array_rank_, comp_overlap_size_);
+	      if (buf_size > 0){
+		/* Something to send */
+		if (compute_pending_ >= max_pending_){
+		  realloc_pending_messages(compute_pending_+1);
+		}
+	        void_buf = (void *)tmp_buf;
+		compute_chunk->make_datatype(comp_overlap_base_, comp_overlap_size_,
+					     comp_overlap_stride_, 
+					     &void_buf,
+					     &comp_datatypes_[compute_pending_]);
+		tmp_buf = (char *)void_buf;
+		tag = io_chunk->chunk_id()*1000 + tag_ctr*10;
+		if (read_op)
+		  nb_receive_message((void *) tmp_buf, 1,
+				     comp_datatypes_[compute_pending_],
+				     io_dest_ids_[i], 
+				     tag + CHUNK_DATA_FROM_IO,
+				     MPI_COMM_WORLD, 
+				     &comp_requests_[compute_pending_]);
+		else
+		  nb_send_message((void *) tmp_buf, 1, 
+				  comp_datatypes_[compute_pending_],
+				  io_dest_ids_[i], tag + CHUNK_DATA_TO_IO,
+				  MPI_COMM_WORLD, 
+				  &comp_requests_[compute_pending_]);
+		compute_pending_++;
+	      }
+	      tag_ctr++;
+	    }
+	  }
+	}
+	compute_chunk = comp_current_array_->get_next_chunk();
+      }
+    }
+     comp_current_array_id_ = array_idx;
+     return NO;
+  } else {
+    if (part_time_io_){
+      /* Just test and get back to io-node stuff */
+      MPI_Testall(compute_pending_, comp_requests_, &flag, comp_statuses_);
+      if (flag){
+	if (!contiguous_){
+	  for(i=0; i<compute_pending_;i++)
+	    MPI_Type_free(&comp_datatypes_[i]);
+	}
+	compute_pending_ = 0;
+	comp_current_array_->clear();
+	return YES;
+      }
+    } else {
+#ifdef DEBUG
+      printf("%d: Waiting for %d messages to complete\n", world_rank_,
+	     compute_pending_);
+#endif
+      MPI_Waitall(compute_pending_, comp_requests_, comp_statuses_);
+      if (!contiguous_){
+	  for(i=0; i<compute_pending_;i++)
+	    MPI_Type_free(&comp_datatypes_[i]);
+	}
+#ifdef DEBUG
+      printf("%d: Done  waiting \n", world_rank_);
+#endif
+
+      compute_pending_ = 0;
+      comp_current_array_->clear();
+      return YES;
+    }
+    return NO;
+  }
+}
+    
+	    
+void CSDIO::start_to_finish(Boolean part_time, 
+				    ArrayGroup *compute_group)
+{
+  int array_idx, make_subchunks, bytes_to_go, buf_size, tag_ctr, tag;
+  Boolean read_op, part_time_done;
+  Chunk *chunk=NULL, *subchunk=NULL, *compute_chunk=NULL;
+
+  /* Don't ask me why.  Ask szu-Wen */
+  comp_current_array_id_ = -1;
+
+  if ((op_type_ == RESTART) || (op_type_ == GENERAL_READ) ||
+      (op_type_ == READ_TIMESTEP)){
+    read_op = YES;
+  } else {
+    read_op = NO;
+  }
+
+  part_time_io_ = part_time;
+  compute_node_group_ = compute_group;
+  comp_current_array_ = NULL;
+  if (part_time_io_){
+    comp_current_array_ = new Array();
+  }
+
+  /* Receive the i/o node information */
+  receive_io_app_info();
+  
+  /* To reduce costs associated with object creation and deletion, we *
+   * will create a dummy chunk,subchunk and compute chunk object and  *
+   * re-initialize them whenever necessary.                           */
+  chunk = new Chunk();
+  current_chunk_ = chunk;
+  subchunk = new Chunk();
+  compute_chunk = new Chunk();
+  
+  for(array_idx=0; array_idx<num_of_arrays_; array_idx++){
+
+    if (part_time_io_)
+      part_time_done = process_compute_side_array(compute_group, array_idx, read_op);
+
+    make_subchunks = -1;
+    current_array_ = find_array(array_idx);
+    nat_chunked_ = current_array_->nat_chunked();
+    sub_chunked_ = current_array_->sub_chunked();
+    if (nat_chunked_ && !sub_chunked_)
+      contiguous_ = YES;
+    else
+      contiguous_ = NO;
+    
+    array_rank_ = current_array_->rank();
+    if (array_rank_ > max_rank_){
+      realloc_schema_bufs(array_rank_);
+    }
+
+    num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+    current_chunk_id_ = current_array_->get_next_index(-1, my_io_rank_,
+						       num_io_nodes_);
+    if (contiguous_){
+      /* Natural chunked and no-user specified subchunking */
+
+      while(current_chunk_id_ < num_of_chunks_){
+	num_overlaps_ = 1;
+	overlap_chunk_ids_[0] = current_chunk_id_;
+	dest_ids_[0] = app_info_->world_rank(current_array_->which_node(
+					   current_chunk_id_, COMPUTE_NODE));
+	if (part_time_io_ && (world_rank_ == dest_ids_[0])){
+	  direct_io(array_idx, current_chunk_id_, read_op, NULL, NULL);
+	} else {
+	  chunk->init(current_array_, current_chunk_id_, IO_NODE, NO_ALLOC);
+	  bytes_to_go = chunk->total_size_in_bytes();
+	  chunk->set_data_ptr(mem_buf_);
+	
+	  /* We don't have to make the schema requests - just post the
+	     send/recv */
+	  tag_ctr = 0;
+	  while (bytes_to_go > 0){
+	    buf_size = min(SUBCHUNK_SIZE, bytes_to_go);
+	    tag = current_chunk_id_*1000+tag_ctr*10;
+	    if (read_op) {
+	      read_data(mem_buf_, buf_size);
+	      nb_send_message((void *)mem_buf_, buf_size, MPI_CHAR, 
+			      dest_ids_[0],
+			      tag+CHUNK_DATA_FROM_IO, MPI_COMM_WORLD,
+			      &requests_[0]);
+	      wait_for_completion();
+	    } else {
+	      nb_receive_message((void *)mem_buf_, buf_size, MPI_CHAR,
+				 dest_ids_[0],tag+CHUNK_DATA_TO_IO,
+				 MPI_COMM_WORLD, &requests_[0]);
+
+	      wait_for_completion();
+	      write_data(mem_buf_, buf_size, chunk->element_size());
+	    }
+	    bytes_to_go -= buf_size;
+	    tag_ctr++;
+	  }
+	  chunk->set_data_ptr(NULL);
+	}
+	
+	current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+							   my_io_rank_,
+							   num_io_nodes_);
+      }
+    } /* End if contiguous_ */
+    else {  /* Have to use MPI-derived datatypes */
+      
+      while(current_chunk_id_ < num_of_chunks_){
+	chunk->init(current_array_, current_chunk_id_, IO_NODE, NO_ALLOC);
+	if (!sub_chunked_ && (make_subchunks == -1)){
+	  current_array_->make_sub_chunks(chunk);
+	  make_subchunks = 1;
+	}
+	num_of_subchunks_=current_array_->layout(SUB_CHUNK)->total_elements();
+	tag_ctr=0;
+
+	for(current_subchunk_id_ = 0; current_subchunk_id_ < num_of_subchunks_;
+	    current_subchunk_id_++){
+	  subchunk->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+	  bytes_to_go = subchunk->total_size_in_bytes();
+	  
+	  if (bytes_to_go > mem_buf_size_){
+	    realloc_mem_bufs(bytes_to_go);
+	  }
+	  subchunk->set_data_ptr(mem_buf_);
+	  
+	  compute_chunk_overlaps(current_array_, subchunk);
+	  compute_schemas(current_array_, subchunk, compute_chunk, array_idx);
+	  
+	  tag = current_chunk_id_ * 1000 + tag_ctr*10;
+	  if (read_op){
+	    read_data(subchunk);
+	    send_data_to_compute_nodes(subchunk, tag);
+	    wait_for_completion();
+	  } else {
+	    receive_data_from_compute_nodes(subchunk, tag);
+	    wait_for_completion();
+	    write_data(subchunk);
+	  }
+	  tag_ctr++;
+	  subchunk->set_data_ptr(NULL);
+	}
+	current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+							   my_io_rank_,
+							   num_io_nodes_);
+      }
+    }
+    if (part_time_io_)
+      while (!process_compute_side_array(compute_group, array_idx, read_op)){};
+  }
+
+  /* Free the temp chunk objects */
+  delete(chunk);
+  delete(subchunk);
+  delete(compute_chunk);
+  chunk = current_chunk_ = subchunk = compute_chunk = NULL;
+  if (comp_current_array_){
+    delete(comp_current_array_);
+    comp_current_array_ = NULL;
+  }
+}
+
+
+/* This constructor is for pure io_nodes only */
+CSDIO::CSDIO(int *schema_string, int schema_size, int world_rank, 
+	     int comp_app_num, int comp_app_size, App_Info *app_info):
+	     Simple_IO(schema_string, schema_size, world_rank, comp_app_num, 
+		       comp_app_size, app_info)
+{
+  clear();
+}
+
+
+/* This call is for compute nodes only */
+CSDIO::CSDIO()
+{
+  do_init();
+}
+
+CSDIO::CSDIO(int *schema_string, int schema_size, int world_rank, 
+	     int comp_app_num, int comp_app_size, App_Info *app_info, Boolean part_time):
+	     Simple_IO(schema_string, schema_size, world_rank, comp_app_num, 
+		       comp_app_size, app_info)
+{
+  if (part_time){
+    /* This is a part-time i/o node */
+    do_init();
+    part_time_io_ = part_time;
+  } else {
+    clear();
+  }
+}
+
+void CSDIO::clear()
+{
+  comp_datatypes_ = NULL;
+  comp_requests_ = NULL;
+  comp_statuses_ = NULL;
+  io_overlap_chunk_ids_ = io_dest_ids_ = comp_overlap_base_ = NULL;
+  comp_overlap_size_ = comp_overlap_stride_ = NULL;
+  io_app_info_ = NULL;
+}
+
+void CSDIO::do_init()
+{
+  max_pending_ = 1;
+  compute_pending_ = 0;
+  comp_datatypes_ = (MPI_Datatype *)malloc(sizeof(MPI_Datatype)*max_pending_);
+  comp_requests_  = (MPI_Request *)malloc(sizeof(MPI_Request)*max_pending_);
+  comp_statuses_  = (MPI_Status  *)malloc(sizeof(MPI_Status)*max_pending_);
+  
+  io_max_overlaps_ = 1;
+  io_overlaps_ =0;
+  io_overlap_chunk_ids_ = (int *) malloc(sizeof(int)*io_max_overlaps_);
+  io_dest_ids_ = (int *) malloc(sizeof(int)*io_max_overlaps_);
+
+  max_comp_rank_ = 10;
+  comp_array_rank_ = 0;
+  comp_overlap_base_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+  comp_overlap_size_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+  comp_overlap_stride_ = (int *) malloc(sizeof(int)*max_comp_rank_);
+}
+
+
+
+
+CSDIO::~CSDIO()
+{
+  if (part_time_io_ || dummy_){
+    if (comp_datatypes_) free(comp_datatypes_);
+    if (comp_requests_) free(comp_requests_);
+    if (comp_statuses_) free(comp_statuses_);
+    if (comp_overlap_base_) free(comp_overlap_base_);
+    if (comp_overlap_size_) free(comp_overlap_size_);
+    if (comp_overlap_stride_) free(comp_overlap_stride_);
+    if (io_overlap_chunk_ids_) free(io_overlap_chunk_ids_);
+    if (io_dest_ids_) free(io_dest_ids_);
+    if (comp_current_array_) delete(comp_current_array_);
+    if (io_app_info_) delete(io_app_info_);
+  };
+  clear();
+}
+
+void CSDIO::receive_io_app_info()
+{
+  int node_type = MPIFS_global_obj->node_type();
+  int num_of_world_nodes, app_info_buf_size, *app_info_buf;
+  int tag = APP_INFO * 10 + SPECIAL;
+  App_Info *tmp_info = NULL;
+  MPI_Status app_status;
+
+  MPI_Comm_size(MPI_COMM_WORLD, &num_of_world_nodes);
+  app_info_buf_size = num_of_world_nodes+2;  /* Num io nodes <= total nodes */
+  app_info_buf = (int *)malloc(sizeof(int)*app_info_buf_size);
+
+  if (node_type == IO_NODE){
+    /* Master i/o node sends io app info to the master compute node */
+    if (MPIFS_global_obj->am_master_io_node()){
+      tmp_info = MPIFS_global_obj->io_app_info();
+      app_info_buf[0] = tmp_info->app_num();
+      app_info_buf[1] = tmp_info->app_size();
+      tmp_info->world_ranks(&app_info_buf[2]);
+      app_info_buf_size = app_info_buf[1] + 2;
+#ifdef DEBUG
+printf("%d:app_num=%d app_size=%d\n", world_rank_, app_info_buf[0],
+       app_info_buf[1]);
+printf("sending messages to %d\n", app_info_->get_master());
+#endif
+      send_message((void *) app_info_buf, app_info_buf_size,  MPI_INT,
+		   app_info_->get_master(), tag, MPI_COMM_WORLD);
+#ifdef DEBUG
+      printf("%d: %d %d %d\n", world_rank_,app_info_buf[0], app_info_buf[1],
+	     app_info_buf[2]);
+#endif
+    }
+  } else if (node_type == PART_TIME_IO){
+    if (MPIFS_global_obj->am_master_io_node()){
+      tmp_info = MPIFS_global_obj->io_app_info();
+      app_info_buf[0] = tmp_info->app_num();
+      app_info_buf[1] = tmp_info->app_size();
+      tmp_info->world_ranks(&app_info_buf[2]);
+      app_info_buf_size = app_info_buf[1] + 2;
+	
+      if (MPIFS_global_obj->am_master_compute_node()){
+	MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+				    app_info_buf_size, MPI_INT, tag);
+      } else {
+	send_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+		     app_info_->get_master(), tag, MPI_COMM_WORLD);
+	receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+			MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, 
+			&app_status);
+	mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+	MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+				    app_info_buf_size, MPI_INT, tag);
+      }
+    } else {
+      receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+		      MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, 
+		      &app_status);
+      mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+      MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+				  app_info_buf_size, MPI_INT, tag);
+    }
+
+    io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+				&app_info_buf[2]);
+  } else if (node_type == COMPUTE_NODE) {
+    receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+		    MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, 
+		    &app_status);
+    mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+#ifdef DEBUG
+    printf("%d:app_info_buf_size =%d\n", world_rank_, app_info_buf_size);
+#endif
+    io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+				&app_info_buf[2]);
+    MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+				app_info_buf_size, MPI_INT, tag);
+
+  } else if (node_type == PART_TIME_COMPUTE) {
+    receive_message((void *)app_info_buf, app_info_buf_size, MPI_INT,
+		    MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, 
+		    &app_status);
+    mpi_get_count(&app_status, MPI_INT, &app_info_buf_size);
+    io_app_info_ = new App_Info(app_info_buf[0], app_info_buf[1],
+				&app_info_buf[2]);
+    MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *) app_info_buf,
+				app_info_buf_size, MPI_INT, tag);
+
+  } else {
+    printf("Error in CSDIO::receive_io_app_info - incorrect node type\n");
+    exit(1);
+  }
+  free(app_info_buf);
+  app_info_buf = NULL;
+}
+
+/* Store the schema only for the part-time i/o case. Don't send the any
+ * schema message.
+ */
+void CSDIO::send_schema_message(int array_id, int index)
+{
+   int *ptr = schema_bufs_[index];
+   
+   if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+     *ptr++ = array_id;
+     *ptr++ = overlap_chunk_ids_[index];
+     *ptr++ = (int) nat_chunked_;
+     *ptr++ = (int) contiguous_;
+     *ptr++ = array_rank_;
+     *ptr++ = op_type_;
+     
+     for(int i=0; i < array_rank_; i++) *ptr++ = overlap_base_[i];
+     for(i=0; i < array_rank_; i++) *ptr++ = overlap_size_[i];
+     for(i=0; i < array_rank_; i++) *ptr++ = overlap_stride_[i];
+   }
+ }
+
+
+void CSDIO::send_data_to_compute_nodes(Chunk *subchunk, int tag)
+{
+  for(int i=0; i < num_overlaps_; i++){
+    if (part_time_io_ && (dest_ids_[i] == world_rank_)){
+      copy_data(subchunk, i, YES, NULL, NULL);
+      requests_[i] = MPI_REQUEST_NULL;
+    } else {
+      nb_send_message((void *)data_ptrs_[i], 1, datatypes_[i],
+		      dest_ids_[i], tag+CHUNK_DATA_FROM_IO,MPI_COMM_WORLD,
+		      &requests_[i]);
+    }
+  }
+}
+
+void CSDIO::receive_data_from_compute_nodes(Chunk *subchunk, int tag)
+{
+  for(int i=0; i < num_overlaps_; i++){
+    if (part_time_io_ && (dest_ids_[i] == world_rank_)){
+      copy_data(subchunk, i, NO, NULL, NULL);
+      requests_[i] = MPI_REQUEST_NULL;
+    } else {
+      nb_receive_message((void *)data_ptrs_[i], 1, datatypes_[i],
+		      dest_ids_[i], tag+CHUNK_DATA_TO_IO,MPI_COMM_WORLD,
+		      &requests_[i]);
+    }
+  }
+}
+	
+	      
+
+void CSDIO::realloc_compute_schema_bufs(int new_max)
+{
+  max_comp_rank_ = new_max;
+  comp_overlap_base_ = (int *) realloc(comp_overlap_base_, new_max*sizeof(int));
+  comp_overlap_stride_ = (int *) realloc(comp_overlap_stride_, new_max*sizeof(int));
+  comp_overlap_size_ = (int *) realloc(comp_overlap_size_, new_max*sizeof(int));
+}
+
+void CSDIO::realloc_pending_messages(int new_max)
+{
+  max_pending_ = new_max;
+  comp_datatypes_ =(MPI_Datatype *)realloc(comp_datatypes_,new_max*sizeof(MPI_Datatype));
+  comp_requests_ = (MPI_Request *)realloc(comp_requests_, new_max*sizeof(MPI_Request));
+  comp_statuses_ = (MPI_Status*)realloc(comp_statuses_, new_max*sizeof(MPI_Status));
+}
+
+
+void CSDIO::realloc_io_buffers(int new_max)
+{
+  io_max_overlaps_ = new_max;
+  io_overlap_chunk_ids_ =(int*)realloc(io_overlap_chunk_ids_, new_max*sizeof(int));
+  io_dest_ids_ = (int *) realloc(io_dest_ids_, new_max*sizeof(int));
+}
+
+void CSDIO::io_chunk_overlaps(Array *array, Chunk *subchunk)
+{
+  int num_compute_chunks;
+
+  if (nat_chunked_){
+    io_overlaps_ = 1;
+    io_overlap_chunk_ids_[0] = current_chunk_id_;
+  }
+  else{
+
+    num_compute_chunks = array->layout(IO_NODE)->total_elements();
+    if (num_compute_chunks > io_max_overlaps_) realloc_io_buffers(num_compute_chunks);
+    subchunk->chunk_overlaps(array, &io_overlaps_,
+				     io_overlap_chunk_ids_, IO_NODE);
+  }
+
+  for(int i=0; i < io_overlaps_;i++)
+    io_dest_ids_[i] = io_app_info_->world_rank
+                                  (array->which_node(io_overlap_chunk_ids_[i],
+						      IO_NODE, num_io_nodes_));
+}
+
+
+void CSDIO::wait_for_completion()
+{
+  MPI_Waitall(num_overlaps_, requests_, statuses_);
+  if (!contiguous_)
+    for(int i=0; i< num_overlaps_;i++)
+      MPI_Type_free(&datatypes_[i]);
+}
+
+char* CSDIO::name()
+{
+  return name_;
+}
+  
diff --git a/src/Panda/CSDIO.h b/src/Panda/CSDIO.h
new file mode 100644
index 0000000..efd1841
--- /dev/null
+++ b/src/Panda/CSDIO.h
@@ -0,0 +1,60 @@
+#ifndef CSDIO_dot_h
+#define CSDIO_dot_h
+
+#include "Simple_IO.h"
+class ArrayGroup;
+class Array;
+class App_Info;
+class Chunk;
+
+class CSDIO : public Simple_IO
+{
+ protected:
+  int               compute_pending_;
+  int               max_pending_;
+  MPI_Datatype      *comp_datatypes_;
+  MPI_Request       *comp_requests_;
+  MPI_Status        *comp_statuses_;
+  int               comp_array_rank_;
+  int               max_comp_rank_;
+  int               *comp_overlap_base_;
+  int               *comp_overlap_size_;
+  int               *comp_overlap_stride_;
+  int               io_max_overlaps_;
+  int               io_overlaps_;
+  int               *io_overlap_chunk_ids_;
+  int               *io_dest_ids_;
+  Array             *comp_current_array_;
+  int               comp_current_array_id_;
+  int               comp_current_chunk_id_;
+  int               comp_current_subchunk_id_;
+  int               comp_num_of_subchunks_;
+  App_Info          *io_app_info_;
+  
+  Boolean         process_compute_side_array(ArrayGroup*,int,Boolean);
+  void            clear();
+  void            do_init();
+  void            receive_io_app_info();
+  virtual void    send_schema_message(int,int);
+  virtual void    send_data_to_compute_nodes(Chunk*,int);
+  virtual void    receive_data_from_compute_nodes(Chunk*,int);
+  void            realloc_compute_schema_bufs(int);
+  void            realloc_pending_messages(int);
+  void            realloc_io_buffers(int);
+  void            io_chunk_overlaps(Array*,Chunk*);
+  void            wait_for_completion();
+
+  
+ public:
+                  CSDIO(int*,int,int,int,int,App_Info*);
+                  CSDIO(int*,int,int,int,int,App_Info*,Boolean);
+                  CSDIO();
+  virtual         ~CSDIO();
+  virtual void    start_to_finish(Boolean, ArrayGroup*);
+  virtual void    compute_node_io_loop(ArrayGroup*);  
+  virtual char*   name();
+};
+
+
+  
+#endif
diff --git a/src/Panda/CSDIO_Shared.C b/src/Panda/CSDIO_Shared.C
new file mode 100644
index 0000000..35e864d
--- /dev/null
+++ b/src/Panda/CSDIO_Shared.C
@@ -0,0 +1,241 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "CSDIO_Shared.h"
+
+/* we could have made this class multiply inherit from CSDIO and CSDIO_Shared, but
+ * we would have to use virtual inheritance and depending on the compiler used, 
+ * there could be a performance penalty (though it would still be dwarfed by the
+ * cost of message-passing and disk i/o)
+ */
+
+extern MPIFS* MPIFS_global_obj;
+extern int    SUBCHUNK_SIZE;
+
+CSDIO_Shared::CSDIO_Shared(int *schema_string, int schema_size, int world_rank,
+		     int comp_app_num,int comp_app_size , App_Info *app_info)
+: CSDIO(schema_string, schema_size, world_rank, comp_app_num, 
+	    comp_app_size, app_info)
+{
+
+ compute_chunk_ = new Chunk();
+ current_chunk_ = new Chunk();
+ subchunk_ = new Chunk();
+ current_array_id_ = -1;
+ if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+     (op_type_ == READ_TIMESTEP))
+   read_op_ = YES;
+ else 
+   read_op_ = NO;
+
+ /* Send the IO app info to the compute nodes */
+ receive_io_app_info();
+
+ /* We need to set the following variables so that continue_io()*
+  * would start the I/O of the first subchunk automatically     */
+ contiguous_ = NO;
+ current_array_id_ = -1;
+ current_chunk_id_ = 0;
+ num_of_chunks_ = -1;   /* This will cause get_next_chunk() to fail */
+ current_subchunk_id_ = 0;
+ num_of_subchunks_ = -1; /* Causes get_next_subchunk() to fail */
+ status_flag_ = START;
+ continue_io();
+}
+
+CSDIO_Shared::~CSDIO_Shared()
+{
+  if (subchunk_) delete subchunk_;
+  if (compute_chunk_) delete compute_chunk_;
+  subchunk_ = compute_chunk_ = NULL;
+}
+
+Boolean CSDIO_Shared::get_next_array(){
+  current_array_id_++;
+  if (current_array_id_ < num_of_arrays_){
+    make_subchunks_ = -1;
+    current_array_ = find_array(current_array_id_);
+    nat_chunked_ = current_array_->nat_chunked();
+    sub_chunked_ = current_array_->sub_chunked();
+    array_rank_ = current_array_->rank();
+ 
+    if (array_rank_ > max_rank_){
+      realloc_schema_bufs(array_rank_);
+    } 
+    num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+    current_chunk_id_ = -1;
+    if (nat_chunked_ && !sub_chunked_)
+      contiguous_ = YES; /* No need to use derived datatypes */
+    else
+      contiguous_ = NO;  /* Have to use derived datatypes */
+    
+    bytes_to_go_ = 0;
+    current_subchunk_id_ = -1;
+    return YES;
+  } else
+    return NO;
+}
+
+
+Boolean CSDIO_Shared::get_next_chunk()
+{
+  int *ptr;
+  
+  if (!current_array_) return NO;
+  current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+						     my_io_rank_,
+						     num_io_nodes_);
+  if (current_chunk_id_ < num_of_chunks_){
+    current_chunk_->set_data_ptr(NULL);
+    current_chunk_->init(current_array_, current_chunk_id_,
+			 IO_NODE, NO_ALLOC);
+    tag_ = current_chunk_id_*1000;
+    if (contiguous_){
+      bytes_to_go_ = current_chunk_->total_size_in_bytes();
+      current_chunk_->set_data_ptr(mem_buf_);
+      ptr = schema_bufs_[0];
+      *ptr++ = current_array_id_;
+      *ptr++ = current_chunk_id_;
+      *ptr++ = (int) nat_chunked_;
+      *ptr++ = (int) contiguous_;
+      *ptr++ = op_type_;
+      *ptr++ = 0;
+      *ptr++ = 0;
+      compute_chunk_overlaps(current_array_, current_chunk_);
+     }
+    else {
+      if (!sub_chunked_ && (make_subchunks_ == -1)){
+	current_array_->make_sub_chunks(current_chunk_);
+	make_subchunks_  = 1;
+      }
+      num_of_subchunks_ = current_array_->layout(SUB_CHUNK)->total_elements();
+      current_subchunk_id_ = -1;
+    }
+    return YES;
+  }
+  else
+    return NO;
+}
+
+
+/* This should not be called for the contiguous_ case */
+Boolean CSDIO_Shared::get_next_subchunk()
+{
+  current_subchunk_id_++;
+  if (current_subchunk_id_ < num_of_subchunks_){
+    subchunk_->set_data_ptr(NULL);
+    subchunk_->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+    bytes_to_go_ = subchunk_->total_size_in_bytes();
+    
+    if (bytes_to_go_ < mem_buf_size_)
+      realloc_mem_bufs(bytes_to_go_);
+    
+    subchunk_->set_data_ptr(mem_buf_);
+    return YES;
+  }
+  else
+    return NO;
+}
+
+
+void  CSDIO_Shared::start_subchunk_io()
+{
+  int *ptr;
+
+  if (contiguous_){
+    ptr = schema_bufs_[0];
+    ptr[6] = min(SUBCHUNK_SIZE, bytes_to_go_);
+    if (read_op_) read_data(mem_buf_, ptr[6]);
+    if (read_op_)
+      nb_send_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+			tag_+CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+    else
+      nb_receive_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+			 tag_+CHUNK_DATA_TO_IO, MPI_COMM_WORLD, &requests_[0]);
+    ptr[5] += ptr[6];  /* Offset of the next subchunk */
+    bytes_to_go_ -= ptr[6];
+    status_flag_ = WAITING;
+    tag_ += 10;
+
+    } else {
+      compute_chunk_overlaps(current_array_, subchunk_);
+
+      compute_schemas(current_array_, subchunk_, compute_chunk_, current_array_id_);
+      if (read_op_){
+	read_data(subchunk_);
+	send_data_to_compute_nodes(subchunk_, tag_);
+      }
+      else
+	receive_data_from_compute_nodes(subchunk_, tag_);
+      status_flag_ = WAITING;
+      tag_ += 10;
+    }
+}
+
+
+Boolean CSDIO_Shared::test_subchunk_io()
+{
+  int flag;
+  MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+  if (flag) {
+    status_flag_ = START;
+    if (!read_op_)
+      if (contiguous_)
+	write_data(mem_buf_, schema_bufs_[0][6], 1);
+      else
+	write_data(subchunk_);
+
+    if (!contiguous_) free_datatypes();
+    return YES;
+  } 
+  return NO;
+}
+
+
+/* Return YES, if I/O is complete */
+Boolean CSDIO_Shared::continue_io()
+{
+  if (status_flag_ == START){
+    if (!start_next_subchunk_io()) return YES; /* IO completed */
+  } else if (status_flag_ == WAITING){
+    if (test_subchunk_io())
+      if (!start_next_subchunk_io()) return YES; /* IO done */
+  } else {
+    printf("Error - Invalid status_flag value \n");
+    exit(11);
+  }
+  return NO;
+}
+
+/* Return yes if you can start the io of another subchunk */
+Boolean CSDIO_Shared::start_next_subchunk_io()
+{
+  if (contiguous_){
+    if (bytes_to_go_ <= 0){
+      while(!get_next_chunk()){
+	if (!get_next_array()) return NO;
+      }
+      /* Since we might be looking at another array */
+      if (!contiguous_) get_next_subchunk();
+    }
+    
+    start_subchunk_io();
+  } else {
+    
+    if (!get_next_subchunk()){
+      /* We have finished this chunk */
+      while(!get_next_chunk()){
+	if (!get_next_array()) return NO;
+      }
+      if (!contiguous_) get_next_subchunk();
+    }
+    
+    start_subchunk_io();
+  }
+  return YES;
+}
+       
diff --git a/src/Panda/CSDIO_Shared.h b/src/Panda/CSDIO_Shared.h
new file mode 100644
index 0000000..08e9fd8
--- /dev/null
+++ b/src/Panda/CSDIO_Shared.h
@@ -0,0 +1,33 @@
+#ifndef CSDIO_Shared_dot_h
+#define CSDIO_Shared_dot_h
+
+#include "CSDIO.h"
+class Chunk;
+
+class CSDIO_Shared : public CSDIO
+{
+ protected:
+  int             current_array_id_;
+  int             status_flag_;
+  Chunk           *subchunk_;
+  Chunk           *compute_chunk_;
+  Boolean         read_op_;
+  int             bytes_to_go_;
+  int             make_subchunks_;
+  int             tag_;
+  
+  Boolean         get_next_chunk();
+  Boolean         get_next_array();
+  Boolean         get_next_subchunk();
+  Boolean         start_next_subchunk_io();
+  void            start_subchunk_io();
+  Boolean         test_subchunk_io();
+    
+ public:
+                  CSDIO_Shared(int*,int,int,int,int, App_Info*);
+  virtual        ~CSDIO_Shared();
+  virtual Boolean continue_io();
+};
+
+#endif
+  
diff --git a/src/Panda/Chunk.C b/src/Panda/Chunk.C
new file mode 100644
index 0000000..d6fd028
--- /dev/null
+++ b/src/Panda/Chunk.C
@@ -0,0 +1,692 @@
+#include "definitions.h"
+#include "Chunk.h"
+#include "Array.h"
+#include <malloc.h>
+
+
+Chunk::Chunk()
+{
+  base_ = stride_ = size_ = NULL;
+  array_ = NULL;
+  chunk_ = NULL;
+  data_ptr_ = NULL;
+  stencil_width_ = 0;
+}
+
+
+/* This constructor is used to create a chunk given array information */
+Chunk::Chunk(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+  do_init(array, chunk_id, node_type, data_status);
+}
+
+/* Re-initialize an already created chunk object */
+void Chunk::init(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+  clear();
+  do_init(array, chunk_id, node_type, data_status);
+}
+
+void Chunk::do_init(Array *array, int chunk_id, int node_type, 
+		    DataStatus data_status)
+{
+   int *stride, *base;
+
+   /* Initialize the instance variables */
+   array_ = array;
+   chunk_ =  NULL;
+   chunk_id_ = chunk_id;
+   am_subchunk_ = NO;
+   element_size_ = array->element_size();
+
+   stride = (int *) malloc(sizeof(int)*array->rank());
+   base = (int *) malloc(sizeof(int)*array->rank());
+   for(int i=0; i < array->rank(); i++){ stride[i] = 1; base[i] = 0; }
+
+   RegularDistribution *layout=(RegularDistribution *)(array->layout(node_type));
+   calculate_base_size_stride(array->rank(), base, array->size(), stride, 
+			      layout->layout(), layout->distribution(),
+			      layout->block_dist(), chunk_id);
+
+   /* check if we have to allocate the data space */
+   switch(data_status) {
+     case ALLOC:
+	data_ptr_ = (char *)malloc(total_size_in_bytes());
+	data_status_ = data_status;
+  	stencil_width_ = 0;
+	break;
+	
+     case NO_ALLOC:
+	data_ptr_ = NULL;
+	data_status_ = data_status;
+  	stencil_width_ = 0;
+	break;
+		
+     default:
+	printf("Unsupported \n");
+	break;
+   }
+}
+
+/* This creates a subchunk , given the chunk and subchunk_id */
+Chunk::Chunk(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+  do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+/* Re-initialize an already created subchunk obj */
+void Chunk::init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{ 
+  clear();
+  do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+
+void Chunk::do_init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+  chunk_id_ = sub_chunkid;
+  element_size_ = mega_chunk->element_size();
+  array_ = mega_chunk->array();
+  chunk_ = mega_chunk;
+  am_subchunk_ = YES;
+
+  RegularDistribution *layout=(RegularDistribution *)(array_->layout(SUB_CHUNK));
+  calculate_base_size_stride(mega_chunk->rank(), mega_chunk->base(),
+		mega_chunk->size(), mega_chunk->stride(), 
+	        layout->layout(), layout->distribution(),
+	        layout->block_dist(), sub_chunkid);
+  /* check if we have to allocate the data space */
+  switch(data_status) {
+    case ALLOC:
+	data_ptr_ = (char *)malloc(total_size_in_bytes());
+	data_status_ = data_status;
+  	stencil_width_ = 0;
+	break;
+	
+    case NO_ALLOC:
+	data_ptr_ = NULL;
+	data_status_ = data_status;
+  	stencil_width_ = 0;
+	break;
+		
+    default:
+	data_ptr_ = NULL;
+	printf("Unsupported \n");
+	break;
+  }
+}
+	
+Chunk::~Chunk()
+{
+   if (base_) delete base_;
+   if (stride_) delete stride_;
+
+   /* Delete the data buffer only if we allocated it in the first place */
+   if ((data_status_ == ALLOC) && data_ptr_) delete data_ptr_;
+}
+
+
+void Chunk::clear()
+{
+   if (base_) free (base_);
+   if (stride_) free (stride_);
+   if (data_ptr_) free( data_ptr_);
+   if (size_)  free(size_);
+    base_ = size_ = stride_ = NULL;
+   data_ptr_ = NULL;
+}
+
+/* This function takes as input the information about the global 
+ * Array and returns the overlapping compute node chunk indices 
+ * via a singly linked list.
+ *
+ * Currently this function can only handle BLOCK,* arrays (Needs
+ * to be extended for the CYCLIC case)
+ */
+void  Chunk::chunk_overlaps(Array *global_array, int* num_overlaps,
+				    int *ret_list, int node_type)
+{
+   RegularDistribution *layout1 = 
+		(RegularDistribution *)global_array->layout(node_type);
+   ArrayLayout *layout= layout1->layout();
+   int layout_rank = layout->rank();
+   int *overlap_base = (int *)malloc(sizeof(int)*layout_rank);
+   int *overlap_size = (int *)malloc(sizeof(int)*layout_rank);
+
+   /* Find out the list of possible overlaps */
+   compute_first_last_chunk(global_array->rank(), global_array->size(),
+	layout, layout1->distribution(), layout1->block_dist(),
+	overlap_base, overlap_size);
+#ifdef DEBUG
+   printf("In chunk_overlaps\n"); 
+   for(int i=0;i<layout_rank;i++)
+	printf("base[%d] =  %d size[%d] = %d\n", i, overlap_base[i], i, overlap_size[i]);
+#endif    
+   layout->indices_list(overlap_base, overlap_size, num_overlaps, ret_list);
+   free(overlap_base);
+   free(overlap_size);
+}
+
+
+/* This function isn't general enough. It implicitly assumes that the I/O 
+ * chunks are distributed using only BLOCK.* distributions. Also the 
+ * compute node chunks are assumed to be distributed using only 
+ * BLOCK,* (can be extended to support CYLCIC later)
+ *
+ * Function assumes that the memory for the return paramters 
+ * overlap_base and overlap_size have been allocated
+ */
+void Chunk::compute_first_last_chunk(int array_rank, int *array_size,
+			ArrayLayout *layout, Distribution *dist,
+			Block_Distribution block_dist,
+			int *overlap_base, int *overlap_size)
+{
+   /* Validation of input data */
+   if (!(layout->valid_distribution(array_rank, dist)))
+   {
+      printf("Invalid distribution in compute_first_last_chunk\n");
+      exit(1);
+   }
+
+   /* Verify to see if we are dealing with BLOCK,* case only */
+   for(int i=0;i<layout->rank();i++)
+   {
+      if (dist[i] == CYCLIC)
+      {
+         printf("Cyclic schema not yet supported\n");
+         exit(2);
+      }
+   }
+
+   for(i=0; i<array_rank;i++)
+   {
+      if (stride_[i] != 1)
+      {
+         printf("Cyclic schema not yet supported\n");
+         exit(2);
+      }
+   }
+
+
+   /* Now we can get down to business */
+   int *overlap_last = (int*)malloc(sizeof(int)*layout->rank());
+   int layout_idx=0, array_idx;
+   int def_chunk_size,rem,tmp,last;
+
+   for(array_idx=0;array_idx < array_rank; array_idx++)
+   {
+       switch(dist[array_idx])
+       {
+           case NONE:
+		break;
+
+	   case CYCLIC:
+		printf("Cyclic schema not yet supported\n");
+                exit(3);
+                break;
+
+           /* Need to verify this stuff - especially the NAS stuff */
+ 	   case BLOCK:
+		switch(block_dist)
+		{
+		    case HPF:
+			def_chunk_size = (array_size[array_idx]+layout->size(layout_idx)-1) 
+							/ (layout->size(layout_idx));
+			overlap_base[layout_idx] = base_[array_idx] 
+ 							/ def_chunk_size;
+			overlap_last[layout_idx] = (base_[array_idx]+size_[array_idx] -1)
+							/ def_chunk_size;
+			break;
+
+		    case NAS:
+			def_chunk_size = array_size[array_idx] 
+						/ layout->size(layout_idx);
+			rem = array_size[array_idx] 
+						% layout->size(layout_idx);
+			if (rem == 0)
+  			{
+ 			  /* perfect distribution */
+			  overlap_base[layout_idx] = base_[array_idx] 
+							/ def_chunk_size;
+			  overlap_last[layout_idx] = (base_[array_idx]
+							+ size_[array_idx] -1)
+							/ def_chunk_size;
+			}
+			else 
+			{
+			  /* first "rem" blocks have "def_chunk+1" elements */
+			  tmp = (def_chunk_size+1)*rem;
+			  if (base_[array_idx] < tmp)
+			  {
+			    overlap_base[layout_idx] = base_[array_idx]
+						/ (def_chunk_size + 1);
+			  }
+ 			  else 
+			  { 
+			    overlap_base[layout_idx] = ((base_[array_idx] - tmp)
+						/ def_chunk_size) + rem;
+			  }
+
+			  last  = base_[array_idx] + size_[array_idx] -1;
+			  if (last < tmp)
+			  {
+			    overlap_last[layout_idx] = last / (def_chunk_size+1);
+			    
+ 			  }
+			  else	
+			  {
+			    overlap_last[layout_idx] = ((last - tmp)
+						/ def_chunk_size) + rem;
+ 			  }
+			}
+			break;
+		
+		    default:
+			printf("Unsupported block distribution\n");
+			exit(2);
+			break;
+		}
+		overlap_size[layout_idx] = overlap_last[layout_idx] 
+					- overlap_base[layout_idx] + 1;
+		layout_idx++;
+	        break;
+
+	    default:
+		printf("Unsupported distribution\n");
+		exit(3);
+		break;
+	}		
+	
+  }
+
+  free(overlap_last);
+  return;
+}
+
+
+
+
+int Chunk::total_size_in_bytes()
+{
+   return (total_size_in_elements()*element_size_);
+}
+
+
+
+int Chunk::total_size_in_elements()
+{
+   return total_elements();
+}
+
+
+int Chunk::chunk_id(){return chunk_id_;}
+
+
+void * Chunk::data_ptr(){return data_ptr_;}
+
+
+
+/* This is not a method. It is an generalized inline function to 
+ * calculate the  overlap between two chunks. The input parameters 
+ * are rank,base,stride,size of the two arrays and the pointers to 
+ * the base,strides and sizes of the resultant chunk. The functions 
+ * assumes that the rank of the input arrays are equal
+ *
+ * This function also assumes that the memory for the return values
+ * r_base, r_stride, rsize have already been allocated.
+ */
+inline void determine_overlap(int rank, int *c1_base, int* c1_size,
+			 int* c1_stride,
+			 int* c2_base, int* c2_size, int* c2_stride,
+			 int* r_base, int* r_size, int* r_stride)
+{
+ 
+  int tmp_base,tmp_size,n;
+
+  for(int i=0; i< rank;i++)
+  {
+    /* Compute overlap in each dimension */
+    if ((c1_stride[i] == 1) && (c2_stride[i] == 1))
+    {
+        /* Simplest case 
+         * r_base = max(c1_base, c2_base)
+         * r_size = max( min(c1_base+c1_size, c2_base+c2_size)-r_base, 0);
+         */
+         r_base[i] = max(c1_base[i], c2_base[i]);
+         r_size[i] = max((min(c1_base[i]+c1_size[i], c2_base[i]+c2_size[i])
+			- r_base[i]), 0);
+         r_stride[i] = 1;
+     }
+     else if (c1_stride[i] == 1)
+     {
+         /* Not so simple - this needs to be verified
+          * tmp_B = max(c1_base,c2_base)
+          * B = tmp_B + (N - ((tmp_B - c2_base)%N))%N
+          * U = min(c1_base+(c1_size-1), c2_base+(c2_size-1)*N) - B
+          * if (U <  0) the no overlap else r_size = U/N + 1
+          */
+          n = c2_stride[i];
+          tmp_base = max(c1_base[i], c2_base[i]);
+          r_base[i] = tmp_base + (n -((tmp_base - c2_base[i])%n))%n;
+          tmp_size = min(c1_base[i]+(c1_size[i]-1), c2_base[i]+(c2_size[i]-1)*n);
+          if (tmp_size < 0)
+          {
+             /* no overlap */
+             r_size[i] = 0;
+             r_stride[i] = 1;
+          }
+          else 
+          {
+ 	     r_size[i] = tmp_size / n + 1;
+             r_stride[i] = n;
+          }
+       }
+       else if (c2_stride[i] == 1)
+       {
+           /* Similar to the previous case */
+          n = c1_stride[i];
+          tmp_base = max(c1_base[i], c2_base[i]);
+          r_base[i] = tmp_base + (n -((tmp_base - c1_base[i])%n))%n;
+          tmp_size = min(c1_base[i]+(c1_size[i]-1)*n, c2_base[i]+(c2_size[i]-1));
+          if (tmp_size < 0)
+          {
+             /* no overlap */
+             r_size[i] = 0;
+             r_stride[i] = 1;
+          }
+          else 
+          {
+ 	     r_size[i] = tmp_size / n + 1;
+             r_stride[i] = n; 
+          }
+       }
+       else if (c1_stride[i] = c2_stride[i])
+       {
+          /* Can do this one later */
+       }
+       else
+       {
+         /* I give up */
+       } 
+   }
+#ifdef DEBUG
+   /* Debugging output */
+   printf ("In determine overlap rank= %d\n", rank);
+   int k;
+   for(k=0;k<rank;k++)
+	printf("%d %d %d %d %d %d %d %d %d\n", c1_base[k], c1_size[k], c1_stride[k],
+		c2_base[k], c2_size[k], c2_stride[k],
+		r_base[k], r_size[k], r_stride[k]);
+#endif
+   return;
+}
+
+
+void Chunk::compute_overlap(Chunk *compute_chunk, int *overlap_base,
+			int *overlap_size, int *overlap_stride)
+{
+  determine_overlap(rank_, base_, size_, stride_,
+		compute_chunk->base(),
+		compute_chunk->size(),
+		compute_chunk->stride(),
+		overlap_base,
+		overlap_size,
+		overlap_stride);
+}
+
+
+int* Chunk::base(){return base_;}
+int* Chunk::size(){return size_;}
+int* Chunk::stride(){return stride_;}
+
+int Chunk::element_size() { return element_size_; } 
+/* This function needs to be verified when the stride is not 1 */
+void Chunk::base_offset(int *base, void **ptr)
+{
+  int base_offset = 0;
+  int offset=1;
+
+  for(int i=rank_ - 1; i>= 0; i--)
+  {
+	base_offset += ((base[i]-base_[i]) / stride_[i])*offset;
+	offset *= size_[i];
+  }
+  base_offset *= element_size_;
+  *ptr = (char *)data_ptr_ + base_offset;
+}
+
+void Chunk::convert_from_number_to_index(int num, int *result)
+{
+  int i,j, product=1;
+  
+  for(i=0;i<rank_;i++)
+  {
+	product=1;
+	for(j=i+1; j< rank_;j++) product *= size_[j];
+	result[i] = num / product;
+	num -= num/product * product;
+   }
+}
+
+
+/* This method  calculates the rank, base, stride of the chunk   *
+ * (subchunk), given the dimensions of the array (chunk) and its *
+ * layout, distribution and the chunk (subchunk index)		 */
+void Chunk::calculate_base_size_stride(int rank, int* old_base,
+			int* old_size, int* old_stride,
+			ArrayLayout *layout, Distribution *dist,
+			Block_Distribution block_dist, int id)
+{
+   int 	*chunk_index=NULL;
+   int  idx=0, layout_idx=0;
+   int  default_size, rem;
+
+   chunk_index = layout->convert_from_number_to_index(id);
+   rank_ = rank;
+   size_ = (int *) malloc(sizeof(int)*rank);
+   base_ = (int *) malloc(sizeof(int)*rank);
+   stride_ = (int *) malloc(sizeof(int)*rank);
+
+
+   /* Verify if it is possible to distribute the array (subchunk) */
+   if (!(layout->valid_index(chunk_index)))
+   {
+        printf("Invalid chunk index %d in compute_base_size_stride\n", id); 
+        exit(1);
+   }
+   if (!(layout->valid_distribution(rank, dist)))
+   {
+ 	printf("Unable to distribute array in compute_base_size_stride\n");
+        exit(2);
+   }
+
+   for(idx=0; idx < rank; idx++)
+   {
+	switch(dist[idx])
+	{
+	  case NONE:
+		base_[idx] = old_base[idx];
+		size_[idx] = old_size[idx];
+		stride_[idx] = old_stride[idx]*1;
+	  	break;
+
+	  case CYCLIC:
+		base_[idx] = old_base[idx] + chunk_index[layout_idx]*old_stride[idx];
+                size_[idx] = (old_size[idx] - chunk_index[layout_idx]
+			+ layout->size(layout_idx)-1)/ layout->size(layout_idx);
+                stride_[idx] = layout->size(layout_idx) * old_stride[idx];
+                layout_idx++;
+                break;
+			
+           case BLOCK:
+		switch(block_dist)
+ 		{
+		     case HPF:
+			default_size = (old_size[idx] + layout->size(layout_idx)-1)
+							/layout->size(layout_idx);
+    			base_[idx] = old_base[idx] + default_size * 
+					chunk_index[layout_idx] *old_stride[idx];
+			size_[idx] = default_size;
+			stride_[idx] = old_stride[idx]*1;
+			/* The last chunk may be smaller */
+                        if (chunk_index[layout_idx] ==(layout->size(layout_idx)-1))
+                        {
+			   size_[idx] = old_size[idx] - 
+					(default_size * chunk_index[layout_idx]);
+			}
+			break;
+
+		     case NAS:
+			default_size = old_size[idx] / layout->size(layout_idx);
+			rem = old_size[idx] % layout->size(layout_idx);
+			if (chunk_index[layout_idx] < rem)
+			{
+			  base_[idx] = old_base[idx] + (chunk_index[layout_idx] +	
+				chunk_index[layout_idx]*default_size) 
+							*old_stride[idx];
+			  size_[idx] = default_size + 1;
+			}
+			else
+			{
+			  base_[idx] = old_base[idx] + (rem +
+				chunk_index[layout_idx]*default_size) 
+						*old_stride[idx];
+			  size_[idx] = default_size;
+                        }
+			stride_[idx] = old_stride[idx] * 1;
+			break;
+
+
+                     default:
+			printf("Unsupported Block Distribution specified\n");
+			exit(3);
+			break;
+                }
+		layout_idx++;
+		break;
+
+	 default:
+		printf("Unsupported Distribution specified\n");
+		exit(3);
+		break;
+       }
+    }
+  
+    free(chunk_index);
+    return;
+}
+
+Array* Chunk::array(){return array_;}
+
+Boolean Chunk::am_subchunk(){return am_subchunk_;}
+
+void Chunk::copy_base_size_stride(int *base, int *size, int *stride)
+{
+   for(int i=0; i< rank_; i++){
+     base[i] = base_[i];
+     size[i] = size_[i];
+     stride[i] = stride_[i];
+   }
+ }
+
+
+
+/* This assumes that all the strides are 1 - i.e no cyclic */
+void  Chunk::make_datatype(int *overlap_base, int *overlap_size, 
+				     int *overlap_stride, void **ptr, 
+				    MPI_Datatype *return_data_type)
+{
+ 
+   MPI_Datatype *tmp_types = (MPI_Datatype *) malloc(sizeof(MPI_Datatype) * rank_);
+   int i,j , offset = 1;
+   int base_offset = 0;
+   int *size, *base;
+   Boolean allocate;
+
+   // If there is a ghost region
+   int *array_size = array_->size();
+   int bound;
+   if (stencil_width_ > 0) {
+     size = (int *)malloc(sizeof(int) * rank_);
+     base = (int *)malloc(sizeof(int) * rank_);
+     for (i=0; i<rank_; i++) {
+       bound = base_[i] + size_[i];
+       base[i] = max(base_[i] - stencil_width_, 0);
+       bound = min(bound + stencil_width_, array_size[i]);
+       size[i] = bound - base[i];
+     }
+     allocate = YES;
+     //printf("##### stencil %d base %d %d %d size %d %d %d\n", stencil_width_, base[0], base[1], base[2], size[0], size[1], size[2]);
+   } else {
+     size = size_;
+     base = base_;
+     allocate = NO;
+   }
+
+   MPI_Type_contiguous(element_size_, MPI_CHAR, &tmp_types[rank_-1]);
+   if (overlap_stride[rank_ -1] != 1) 
+   {
+	printf("error - stride is %d", overlap_stride[rank_ -1]);
+ 	exit(10);
+   }
+   MPI_Type_vector(overlap_size[rank_-1], 1, 1, tmp_types[rank_-1], &tmp_types[rank_-2]);
+   for(i=rank_-1; i > 0; i--)
+   {
+	offset=1;
+	for(j=i;j <rank_; j++) offset *= size[j];
+	if (overlap_stride[i-1] != 1)
+        {
+		printf("error - stride is %d\n", overlap_stride[i-1]);
+		exit(10);
+        }
+	if (i != 1){
+ 
+ 	     MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_, 
+                              tmp_types[i-1],
+			      &tmp_types[i-2]);
+        }
+        else 
+             MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_,
+                              tmp_types[i-1], 
+                              return_data_type);
+   }
+   MPI_Type_commit(return_data_type);
+   offset=1;
+   for(i=rank_-1;i >= 0; i--)
+   {
+	base_offset += (overlap_base[i] - base[i])*offset;
+	offset *= size[i];
+   }
+
+   *ptr = data_ptr_ + base_offset*element_size_;
+   free (tmp_types);
+   if (allocate) { 
+     free(size);
+     free(base);
+   }    
+}
+
+
+/* Old data buffer should be freed by someother function */
+void Chunk::set_data_ptr(char *data_ptr){
+  data_ptr_ = data_ptr;
+}
+
+void Chunk::set_stencil_width(int stencil_width){
+  stencil_width_ = stencil_width;
+}
+
+Chunk::Chunk(Array *array, int *base, int *size)
+{
+  array_ = array;
+  rank_ = array->rank();
+  element_size_ = array->element_size();
+  chunk_id_ = 0;
+  am_subchunk_ = NO;
+
+  base_ = copy_int_list(rank_, base);
+  size_ = copy_int_list(rank_, size);
+  stride_ = (int *)malloc(sizeof(int) * rank_);
+  for (int i=0; i<rank_; i++) stride_[i] = 1;
+  data_status_ = NO_ALLOC; data_ptr_ = NULL;
+}
diff --git a/src/Panda/Chunk.h b/src/Panda/Chunk.h
new file mode 100644
index 0000000..523a7d1
--- /dev/null
+++ b/src/Panda/Chunk.h
@@ -0,0 +1,68 @@
+#ifndef Chunk_dot_h
+#define Chunk_dot_h
+
+#include "mpi.h"
+#include "List.h"
+#include "ArrayLayout.h"
+
+class Array;
+
+
+class Chunk : public Template, public Linkable {
+ protected:
+  int              *base_;
+  int              *stride_;
+  int              chunk_id_;  /* This should be unique */
+  int              element_size_;
+  Array*           array_;
+  Chunk*           chunk_;
+  char             *data_ptr_;
+  int		   stencil_width_;
+  DataStatus       data_status_;
+  Boolean          am_subchunk_;
+
+
+  void             compute_first_last_chunk(int, int*, 
+					    ArrayLayout*,Distribution*,
+					    Block_Distribution, int*, 
+					    int*);
+  void             do_init(Array*,int,int, DataStatus);
+  void             do_init(Chunk*,int,DataStatus);
+  void             clear();
+
+ public:
+                   Chunk();
+                   Chunk(Array*,int*,int*);
+                   Chunk(Array*,int,int,DataStatus);
+                   Chunk(Chunk*, int, DataStatus);
+  void             init(Array*,int,int,DataStatus);
+  void             init(Chunk*,int,DataStatus);
+  virtual         ~Chunk();
+  void             chunk_overlaps(Array *, int*, int*, int);
+  int              total_size_in_bytes();
+  int              total_size_in_elements();
+  int              chunk_id();
+  void            *data_ptr();
+  void             set_data_ptr(char *);
+  void             set_stencil_width(int);
+  int*             base();
+  int*             stride();
+  int*             size();
+  int              element_size();
+  void             base_offset(int*, void**);
+  void             compute_overlap(Chunk*,int*,int*,int*);
+  void             convert_from_number_to_index(int,int*);
+  void             calculate_base_size_stride(int, int*, int*, int*,
+					ArrayLayout*, Distribution*,
+					Block_Distribution, int);
+  Array*           array();
+  Boolean          am_subchunk();
+  void             copy_base_size_stride(int*,int*, int*);
+  void             make_datatype(int*,int*,int*,void**,MPI_Datatype*);      
+
+};
+
+#endif
+
+ 
+
diff --git a/src/Panda/Collective_IO.C b/src/Panda/Collective_IO.C
new file mode 100644
index 0000000..118afe6
--- /dev/null
+++ b/src/Panda/Collective_IO.C
@@ -0,0 +1,25 @@
+#include "definitions.h"
+#include "Collective_IO.h"
+
+Collective_IO::Collective_IO(){}
+
+Collective_IO::~Collective_IO()
+{
+}
+
+Boolean Collective_IO::continue_io()
+{
+ printf("This function should not be executed\n");
+ return YES;
+}
+
+void Collective_IO::start_to_finish(Boolean part_time_io, Array *array)
+{
+ printf("This function should not be executed\n");
+}
+ 
+void Collective_IO::compute_node_io_loop(Array *array)
+{
+ printf("This function should not be executed\n");
+}
+
diff --git a/src/Panda/Collective_IO.h b/src/Panda/Collective_IO.h
new file mode 100644
index 0000000..aa351a7
--- /dev/null
+++ b/src/Panda/Collective_IO.h
@@ -0,0 +1,18 @@
+#ifndef Collective_IO_dot_h
+#define Collective_IO_dot_h
+
+#include "List.h"
+class Array;
+
+class Collective_IO : public Linkable{
+ public:
+                  Collective_IO();
+  virtual        ~Collective_IO();
+  virtual Boolean continue_io();
+  virtual void     start_to_finish(Boolean, Array*);
+  virtual void     compute_node_io_loop(Array*);
+};
+
+#endif
+
+  
diff --git a/src/Panda/List.C b/src/Panda/List.C
new file mode 100644
index 0000000..8861a6f
--- /dev/null
+++ b/src/Panda/List.C
@@ -0,0 +1,175 @@
+#include "definitions.h"
+#include "List.h"
+
+Cell::Cell()
+{
+  item_ = NULL;
+  next_ = NULL;
+  prev_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item)
+{
+  item_ = new_item;
+  next_ = NULL;
+  prev_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item, Cell *prev)
+{
+  item_ = new_item;
+  prev_ = prev;
+  next_ = NULL;
+}
+
+Cell::Cell(Linkable *new_item, Cell *next, Cell *prev)
+{
+  item_ = new_item;
+  next_ = next;
+  prev_ = prev;
+}
+
+
+Cell::~Cell()
+{
+  next_ =  NULL;
+  prev_ = NULL;
+  item_ = NULL;   
+}
+
+
+Linkable* Cell::item(){return item_;}
+
+Cell*  Cell::next(){return next_;}
+
+Cell*  Cell::prev(){return prev_;}
+
+void Cell::set_next(Cell *next) {next_ = next;}
+
+void Cell::set_prev(Cell *prev) {next_ = prev;}
+
+/*----------------------------------------------------*/
+
+List::List()
+{
+  head_ = NULL;
+  tail_ = NULL;
+  old_search_val_ = NULL;
+}
+
+List::~List()
+{
+  Cell* ptr = head_;
+  Cell* tmp;
+
+  while(ptr != NULL)
+  {
+	tmp = ptr->next();
+	delete ptr;
+        ptr = tmp;
+  }
+  head_ = NULL;
+  tail_ = NULL;
+  old_search_val_ = NULL;
+}
+
+
+/* Simply add to the beginning of the list */
+void List::insert(Linkable* new_item)
+{
+   add_first(new_item);
+}
+
+
+/* Add to the end of the list */  
+void List::add_last(Linkable *new_item)
+{
+  Cell* tmp;
+
+  if ((tail_ == NULL) && (head_ == NULL))
+  {
+	/* The list is empty */
+	tmp  = new Cell(new_item);
+	tail_ =tmp;
+        head_ = tmp;
+  	return;
+  }
+  else if ((tail_ != NULL) && (head_ != NULL))
+  {
+     	tmp = new Cell(new_item , tail_);
+ 	tail_->set_next(tmp);
+	tail_ = tmp;
+  	return;
+  }
+  else 
+  {
+	printf("Error in List obj\n");
+  }
+
+}
+
+/* Add to the beginning of the list */
+void List::add_first(Linkable *new_item)
+{
+  Cell *tmp;
+  if ((tail_ == NULL) && (head_ == NULL))
+  {
+	/* The list is empty */
+	tmp = new Cell(new_item);
+        head_ = tmp;
+	tail_ = tmp;
+	return;
+  }
+  else if ((tail_ != NULL) && (head_ != NULL))
+  {
+	tmp = new Cell(new_item, head_);
+	head_ = tmp;
+	return;
+  }
+  else
+  {
+	printf("Error in List obj\n");
+  }
+}
+
+
+/* Remove the item from the List */        
+void List::remove(Linkable *new_item)
+{
+  Cell *ptr = head_, *prev, *next;
+  while (ptr)
+  {
+	if (ptr->item()==new_item)
+        {
+		prev = ptr->prev();
+		next = ptr->next();
+	 	if ((prev != NULL) && (next != NULL))
+		{
+			prev->set_next(next);
+			next->set_prev(prev);
+			delete ptr;
+		}
+		else if (prev != NULL)
+		{
+			/* ptr must be the last  item */
+			prev->set_next(NULL);
+			tail_ = prev;
+			delete ptr;
+		}
+		else if (next != NULL)
+		{
+			/* ptr must be the first item */
+			next->set_prev(NULL);
+			head_ = next;
+			delete ptr;
+		}
+		else
+		{
+			head_ = tail_ = NULL;
+			delete ptr;
+		}
+		return;
+	}
+	else ptr = ptr->next();
+  }
+}
diff --git a/src/Panda/List.h b/src/Panda/List.h
new file mode 100644
index 0000000..1d162d4
--- /dev/null
+++ b/src/Panda/List.h
@@ -0,0 +1,61 @@
+#ifndef Link_dot_h
+#define Link_dot_h
+
+
+/* This is the dummy base class for all items      *
+ * to be placed in a linked list. It would have    *
+ * been cleaner to use Templates but support for   *
+ * templates varies with different compilers and   *
+ * the problem of code blow up etc exists.         *
+ */
+class Linkable 
+{
+ public:
+                  Linkable(){};
+   virtual       ~Linkable(){};
+   
+};
+ 
+
+/* The Cells contains a Linkable element and ptrs  *
+ * to the next and previos cells                   *
+ */
+class Cell {
+  Linkable         *item_;
+  Cell             *next_;
+  Cell             *prev_;
+ public:
+                   Cell();
+                   Cell(Linkable*);
+                   Cell(Linkable*,Cell*);
+                   Cell(Linkable*,Cell*,Cell*);
+                  ~Cell();   
+  Linkable*        item();
+  Cell*            next();
+  Cell*            prev();
+  void             set_next(Cell*);
+  void             set_prev(Cell*);
+};
+
+
+/* The List class provides support for creating a  *
+ * list and provides operations like inserting,    *
+ * deleting elements to the beginning and the end  *
+ * of the list                                     *
+ */
+class List {
+ public:
+  Cell             *head_;
+  Cell             *tail_;
+  Cell             *old_search_val_;   /* result of the previous search */
+                   List();
+                  ~List();
+  void             insert(Linkable*);
+  void             add_last(Linkable*);
+  void             add_first(Linkable*);
+  void             remove(Linkable*);
+};
+
+#endif
+
+
diff --git a/src/Panda/MPIFS.C b/src/Panda/MPIFS.C
new file mode 100644
index 0000000..e8b56ae
--- /dev/null
+++ b/src/Panda/MPIFS.C
@@ -0,0 +1,971 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Collective_IO.h"
+#include "Simple_IO.h"
+#include "Array.h"
+#include "Chunk.h"
+#include "message.h"
+#define Max_Open_Files 1000
+
+
+#include "external/IEEEIO/src/Arch.h"
+
+extern "C" {
+  IOFile IEEEopen(char *,char *);
+  int IOclose(IOFile);
+}
+
+int BRANCHING_FACTOR=8;
+int SUBCHUNK_SIZE = 1048576;
+MPIFS* MPIFS_global_obj;
+
+/* Notes,Hacks,Assumptions:
+ *  -   io_app_info_ and app_info_ point to the same object on the I/O
+ *      for the regular. This hack is to allow for code re-use in the 
+ *      part-time I/O case.
+ */
+
+/* Constructor for the normal case - i.e no part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int app_num, int relative_rank, int app_size,
+	int *world_ranks)
+{
+#ifdef DEBUG
+  int abs_rank;
+  MPI_Comm_rank(MPI_COMM_WORLD, &abs_rank);
+  printf("%d:node_type=%d, app_num=%d , relative_rank=%d, app_size=%d\n",
+	 abs_rank, node_type, app_num, relative_rank, app_size);
+#endif
+  do_init(node_type, app_num, relative_rank, app_size, world_ranks);
+}
+
+/* Constructor for the normal case - i.e no part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int app_num, int relative_rank, int app_size,
+	int *world_ranks, Boolean shared_flag)
+{
+  do_init(node_type, app_num, relative_rank, app_size, world_ranks);
+}
+
+
+/* Constructor for part-time I/O nodes */
+MPIFS::MPIFS(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+	int io_rank, int io_size, int *io_world_ranks)
+{
+#ifdef DEBUG
+  int abs_rank;
+  MPI_Comm_rank(MPI_COMM_WORLD, &abs_rank);
+  printf("%d:node_type=%d, comp_rank=%d, comp_size=%d io_rank=%d io_size=%d\n",
+	 abs_rank,node_type,  comp_rank, comp_size, io_rank, io_size);
+#endif
+  do_init(node_type, comp_rank, comp_size, comp_world_ranks, io_rank, io_size,
+	io_world_ranks);
+}
+
+/* Initialize the file system object for the regular case (i.e no part-time I/O nodes) */
+void MPIFS::do_init(int node_type, int app_num, int relative_rank, int app_size,
+		int *world_ranks)
+{
+  MPI_Status status;
+  int tag, tmp;
+
+  MPIFS_global_obj = this;
+
+  if ((node_type  != IO_NODE) && (node_type != COMPUTE_NODE))
+  {
+	printf("Invalid node type in MPIFS::do_init(int,int,int,int,int*)\n");
+	exit(10);
+  }
+
+  /* Initialize the state */
+  node_type_    = node_type;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+
+  app_num_	= app_num;
+  app_rank_	= relative_rank;
+  app_size_ 	= app_size;
+  app_info_	= new App_Info(app_num, app_size, world_ranks);
+  comm_		= (MPI_Comm *) malloc(sizeof(MPI_Comm));
+  MPI_Comm_split(MPI_COMM_WORLD, app_num_, app_rank_, comm_);
+  num_apps_ = num_apps_alive_ = global_barrier_count_ = 0;
+  current_max_app_num_ = -1;
+  compute_apps_info_ = NULL;
+  mem_buf_ = NULL;
+  num_open_files_ = 0;
+  for (int i=0; i<Max_Open_Files; i++) open_file_names_[i] = NULL;
+
+  /* Part-time i/o stuff - unneeded in this case */
+  io_app_num_ = io_app_rank_ = io_app_size_ = -1;
+  io_comm_ = NULL; 
+  io_app_info_ = NULL;
+
+ /* Broadcast the rank of the master I/O node. The strategy is *
+  * to send the info to node 0 and have it broadcast it        */
+  master_io_node_ = -1;
+  if (node_type_ == IO_NODE)
+     master_io_node_ = app_info_->get_master();
+  
+  if (world_rank_ == 0)
+  {
+    if (master_io_node_ != 0)
+      receive_message((void *)&master_io_node_, 1, MPI_INT, MPI_ANY_SOURCE,
+		      1000, MPI_COMM_WORLD, &status);
+  }
+  else if (world_rank_ == master_io_node_)
+  {
+    send_message((void *)&master_io_node_, 1 , MPI_INT, 0, 1000, 
+		 MPI_COMM_WORLD);
+  }
+  MPI_Bcast((void *)&master_io_node_, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  /* Now all nodes know who the master I/O node is */
+  
+  if (node_type_ == IO_NODE)
+  {
+        mem_buf_size_ = 2*SUBCHUNK_SIZE;  /* Factor of 2 - just to be safe */
+	mem_buf_ = (char *) malloc(sizeof(char)*mem_buf_size_);
+ 
+	io_app_info_ = app_info_;
+	io_node_main_loop();
+  }
+  else if (node_type_ == COMPUTE_NODE)
+  {
+	/* Send the info about the compute application to the *
+         * master I/O node (only master I/O node has to do it) */
+	if (am_master_compute_node())
+        {	
+		tag = app_num_*100+APP_INFO*10+SPECIAL;
+		send_message((void *)world_ranks, app_size, MPI_INT, 
+			master_io_node_, tag, MPI_COMM_WORLD);
+		receive_message((void *)&tmp, 1, MPI_INT, master_io_node_,
+			tag, MPI_COMM_WORLD, &status);
+	}
+	MPI_Barrier(*comm_);
+
+	/* Create an intra-comm with the I/O nodes. This stuff is *
+	 * used only for implemneting barriers etc 		  */
+	MPI_Comm *inter_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+	MPI_Comm *intra_com = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+	MPI_Intercomm_create(*comm_, 0, MPI_COMM_WORLD,
+			 master_io_node_, app_num, inter_comm);
+	MPI_Intercomm_merge(*inter_comm, 1, intra_com);
+  	app_info_->set_intra_comm(intra_com);
+   }
+   else
+   {
+	printf("Unsupported node type\n");
+	exit(1);
+   }
+}
+
+void MPIFS::do_init(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+		    int io_rank, int io_size, int *io_world_ranks)
+{
+  MPI_Group global_group, comp_group, io_group;
+  
+
+  MPIFS_global_obj = this;
+  if ((node_type != PART_TIME_COMPUTE) && (node_type != PART_TIME_IO))
+  {
+	printf("Incorrect initialization for node_type %d\n", node_type);
+	exit(10);
+  }
+  
+  /* Convention that logical I/O app gets app_num=0 and compute app get app_num=1 */
+  node_type_ = node_type;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+
+  app_num_ = 1;
+  app_rank_ = comp_rank;
+  app_size_ = comp_size;
+  master_io_node_ = io_world_ranks[0];
+  comm_ = (MPI_Comm *)malloc(sizeof(MPI_Comm));
+  MPI_Comm_group(MPI_COMM_WORLD, &global_group);
+  MPI_Group_incl(global_group, comp_size, comp_world_ranks, &comp_group);
+  MPI_Comm_create(MPI_COMM_WORLD, comp_group, comm_);
+  app_info_ = new App_Info(1, app_size_, comp_world_ranks);
+
+  num_apps_ = 1;
+  num_apps_alive_ =1;
+  current_max_app_num_=-1;
+  global_barrier_count_ =0;
+  compute_apps_info_ = NULL;
+  mem_buf_ = NULL;
+
+  io_app_num_  = 0;  /* By convention */
+  io_app_rank_ = io_rank;
+  io_app_size_ = io_size;
+  io_app_info_ = NULL;
+  
+  /* Everyone in MPI_COMM_WORLD must make this call */
+  io_comm_ = (MPI_Comm*) malloc(sizeof(MPI_Comm));
+  MPI_Comm_group(MPI_COMM_WORLD, &global_group);
+  MPI_Group_incl(global_group, io_size, io_world_ranks, &io_group);
+  MPI_Comm_create(MPI_COMM_WORLD, io_group, io_comm_);
+
+  if (node_type_ == PART_TIME_IO)
+  {
+        mem_buf_size_ = 2*SUBCHUNK_SIZE;  /* Factor of 2 - just to be safe */
+	mem_buf_ = (char *) malloc(sizeof(char)*mem_buf_size_);
+	App_Info *app = new App_Info(1, comp_size, comp_world_ranks);
+	io_app_info_ = new App_Info(0, io_app_size_, io_world_ranks);
+	insert_compute_app(1, app);
+  }
+  
+}
+		
+MPIFS::~MPIFS()
+{
+   if (node_type_ == COMPUTE_NODE)
+   {
+	if (am_master_compute_node())
+		send_message((void *)&app_num_, 1, MPI_INT, master_io_node_,
+				QUIT, MPI_COMM_WORLD);
+   }
+
+
+   if (app_info_) delete app_info_;
+   if (mem_buf_) free(mem_buf_);
+
+   for (int i=0; i<num_open_files_; i++) {
+     free(open_file_names_[i]);
+     IOclose((IOFile)open_file_ptrs_[i]);
+   }
+     
+   if (compute_apps_info_){
+	 for(int i=0; i<=current_max_app_num_; i++)
+		if (compute_apps_info_[i]) delete compute_apps_info_[i];
+	 delete compute_apps_info_;
+   }
+
+   app_info_  = io_app_info_ = NULL;
+   compute_apps_info_ =NULL;
+   if (comm_) 
+   {
+	MPI_Comm_free(comm_);
+	free(comm_);
+	comm_= NULL;
+   }
+}
+
+
+Boolean MPIFS::am_compute_node()
+{
+  if (node_type_ == IO_NODE) return NO;
+  else return YES;
+}
+
+
+Boolean MPIFS::am_io_node()
+{
+  if ((node_type_ == IO_NODE) || (node_type_ == PART_TIME_IO))
+	return YES;
+  else  return NO;
+}
+
+Boolean MPIFS::am_master_io_node()
+{
+   if (am_io_node() && (world_rank_ == io_app_info_->get_master()))
+	return YES;
+   else return NO;
+}
+
+Boolean MPIFS::am_master_compute_node()
+{
+   if (am_compute_node() && (world_rank_ == app_info_->get_master()))
+ 	return YES;
+   else return NO;
+}
+
+
+
+
+
+/* This is a highly restricted version of a broadcast function. The broadcast
+ * is performed using tree-structured  communication, starting at relative 
+ * node 0. The broadcast is implemented using tree-structured communication.
+ */
+void MPIFS::Broadcast(int node_type, void *buf, int count, 
+	              MPI_Datatype datatype, int tag)
+{
+  App_Info *app;
+  int my_rank = app_rank_, size;
+  int low, high, i, dest;
+ 
+  if (node_type == COMPUTE_NODE) app = app_info_;
+  else app = io_app_info_;
+  size = app->app_size();
+ 
+  low = my_rank*BRANCHING_FACTOR+1;
+  high = (my_rank+1)*BRANCHING_FACTOR+1;
+  i = low;
+ 
+#ifdef DEBUG
+  printf("%d: Bcast low %d high %d size=%d\n", world_rank_,low, high,size);
+#endif
+  /* Can use asynchronous sends */
+  while ((i<size) && (i<high)) {
+    dest = app->world_rank(i);
+    send_message(buf, count, datatype, dest, tag, MPI_COMM_WORLD);
+    i++;
+  }
+}
+	
+
+void MPIFS::io_node_main_loop()
+{
+  MPI_Status  msg_status;
+  int         msg_tag, msg_code, msg_src;
+
+   while(1){
+     wait_for_next_message(&msg_code, &msg_src, &msg_tag, &msg_status);
+     switch(msg_code){
+     
+     case SPECIAL:
+       process_io_special_message(msg_src, msg_tag, &msg_status);
+       break;
+       
+     case ARRAYGROUP_SCHEMA:
+       start_collective_io(msg_src, msg_tag, &msg_status);
+       break;
+     
+     case ATTRIBUTE_SCHEMA:
+       start_attribute_io(msg_src, msg_tag, &msg_status);
+       break;
+       
+     case QUIT:
+       if (received_quit_message(msg_src, msg_tag, &msg_status))
+	 return;
+       break;
+       
+     default:
+       printf("Error - did not understand message code %d from %d with  tag %d\n",
+	      msg_code, msg_src, msg_tag);
+       break;
+     }
+   }
+}
+  
+void MPIFS::wait_for_next_message(int *msg_code, int *msg_src, int *msg_tag, 
+			MPI_Status *msg_status)
+{
+   MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, msg_status);
+   *msg_tag = msg_status->MPI_TAG;
+   *msg_src = msg_status->MPI_SOURCE;
+   *msg_code = *msg_tag % 10;
+   return;
+}
+
+void MPIFS::process_io_special_message(int msg_src, int msg_tag, 
+					MPI_Status *status)
+{
+   int msg_code = (msg_tag / 10) % 10 ;  /* Extract the second digit */
+   int msg_len, app_num, *world_ranks;
+   MPI_Comm *inter_comm, *intra_com;
+   App_Info *app;
+
+   switch (msg_code)
+   {
+	case APP_INFO:
+		MPI_Get_count(status, MPI_INT, &msg_len);
+		world_ranks = (int *) malloc(sizeof(int)*msg_len);
+		receive_message((void *)world_ranks, msg_len, MPI_INT, msg_src,
+				msg_tag, MPI_COMM_WORLD, status);
+		Broadcast(IO_NODE, world_ranks, msg_len, MPI_INT, msg_tag);
+		app_num = msg_tag / 100;
+		if (am_master_io_node())
+ 			send_message((void *) &app_num, 1, MPI_INT, msg_src, 
+				msg_tag, MPI_COMM_WORLD);
+		app = new App_Info(app_num, msg_len, world_ranks);
+		insert_compute_app(app_num, app);
+                /* Create the Intra communicator */
+		inter_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+		intra_com = (MPI_Comm *) malloc(sizeof(MPI_Comm));
+		MPI_Intercomm_create(*comm_, 0, MPI_COMM_WORLD,
+				 world_ranks[0], (msg_tag/100), inter_comm);
+		MPI_Intercomm_merge(*inter_comm, 0, intra_com);
+		app->set_intra_comm(intra_com);
+		free(world_ranks);
+		break;
+
+	case APP_BARRIER:
+		/* This should be used very carefully when there are more than
+		 * 1 compute application running
+		 */
+		receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+			  MPI_COMM_WORLD, status);
+		Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+		MPI_Barrier(*(find_compute_app(app_num)->intra_comm()));
+		break;
+
+	case GLOBAL_BARRIER:
+		receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+			  MPI_COMM_WORLD, status);
+		Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+		global_barrier_count_++;
+		if (global_barrier_count_ == num_apps_alive_)
+		{
+			MPI_Barrier(MPI_COMM_WORLD);
+			global_barrier_count_ = 0;
+		}
+		break;
+
+       case CLEANFILES:	
+		receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+			  MPI_COMM_WORLD,status);
+		Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+		cleanfiles(app_num);
+		break;
+
+       case FLUSHFILES:
+		receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+			  MPI_COMM_WORLD, status);
+		Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+		if (world_rank_ == 0) flushfiles(app_num);
+		
+		break;
+				
+  
+	case CREATEFILES:
+		receive_message((void *)&app_num,1, MPI_INT, msg_src, msg_tag,
+			  MPI_COMM_WORLD, status);
+		Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+		createfiles(app_num);
+		break;
+
+	default:
+		printf("Unknown message code %d in proces_io_special\n", msg_code);
+		break;
+    }
+}
+
+
+void MPIFS::cleanfiles(int app_num)
+{
+  char buf[64];
+  sprintf(buf, "rm -rf %s%d\n", FILEPREFIX, app_num);
+  if (world_rank_ == 0)  system(buf);
+}
+
+void MPIFS::createfiles(int app_num)
+{
+  char buf[64];
+  sprintf(buf, "mkdir %s%d\n", FILEPREFIX, app_num);
+  if (world_rank_ == 0) system(buf);
+}
+
+void MPIFS::flushfiles(int app_num)
+{
+  FILE *flushfp;
+  char filename[64];
+
+  sprintf(filename, "%s%d/%s.%d", FILEPREFIX, app_num,"flushfile",world_rank_);
+  if ((flushfp = fopen(filename,"wb+"))==NULL)
+    {
+      printf("Cannot open flush file on io node\n");
+      exit(1);
+    };
+
+#ifdef TARGETAIX
+  int size = 4*1024*1024;  // on SP2
+#else
+  int size = 1*1024*1024;   // on bunny
+#endif
+  char * buffer = (char*) malloc(sizeof(char) * size);
+#ifdef TARGETAIX
+  for(int i=0; i < 32; i++){
+   fwrite(buffer, sizeof(char), size, flushfp);
+  }
+#else
+  fwrite (buffer, sizeof(char), size, flushfp);
+#endif
+  int filedesc;
+  filedesc = fileno(flushfp);
+  fsync(filedesc);
+  fclose(flushfp);
+
+  if ((flushfp = fopen(filename,"r"))==NULL)
+    {
+      printf("Cannot open flush file on io node\n");
+      exit(1);
+    };
+
+#ifdef TARGETAIX
+  for(i=0; i < 32; i++){
+    fread(buffer, sizeof(char), size, flushfp);
+  }
+#else
+  fread(buffer, sizeof(char), size, flushfp);
+#endif
+  
+  fclose(flushfp);
+  free(buffer);
+}
+
+
+void MPIFS::insert_compute_app(int app_num, App_Info *app)
+{
+
+   if (app_num > current_max_app_num_)
+   {
+     if (compute_apps_info_)
+       compute_apps_info_ = (App_Info **)realloc(compute_apps_info_,
+					       sizeof(App_Info*)*(app_num+1));
+     else
+       compute_apps_info_ = (App_Info**)malloc(sizeof(App_Info*)*
+					       (app_num + 1));
+							      
+     for(int i=current_max_app_num_+1; i <= app_num ;i++){
+       compute_apps_info_[i] = NULL;
+     }
+     current_max_app_num_ = app_num;
+   }
+   compute_apps_info_[app_num] = app;
+   num_apps_++;
+   num_apps_alive_++;
+}
+
+App_Info* MPIFS::find_compute_app(int app_num)
+{
+  return  compute_apps_info_[app_num];
+}
+
+
+Boolean MPIFS::received_quit_message(int msg_src, int msg_tag,
+					MPI_Status *status)
+{
+   int app_num;
+   receive_message((void *) &app_num, 1, MPI_INT, msg_src, msg_tag,
+		MPI_COMM_WORLD, status);
+   Broadcast(IO_NODE, (void *) &app_num, 1, MPI_INT, msg_tag);
+   num_apps_alive_--;
+   if (num_apps_alive_ == 0) return YES;
+   else return NO;
+}
+
+IOFile MPIFS::open_file(char *name, int op_type)
+{
+  IOFile fp;
+
+  for (int i=0; i<num_open_files_; i++) 
+    if (!strcmp(name, open_file_names_[i])) { 
+      fp = open_file_ptrs_[i];
+      free(name);
+      break;
+    } 
+
+  if (i == num_open_files_) {
+    char name1[1000], fpfx[100];
+    FILE *fp1;
+
+    fp1 = fopen("FILEPREFIX", "r");
+    fscanf(fp1, "%s", fpfx);
+    fclose(fp1);
+    sprintf(name1, "%s/%s", fpfx, name);
+
+    open_file_names_[num_open_files_] = name;
+    if ((op_type == RESTART) || (op_type == READ_TIMESTEP)) 
+      fp = open_file_ptrs_[num_open_files_++] = IEEEopen(name1, "r");
+    else if ((op_type == TIMESTEP) || (op_type == CHECKPOINT)) 
+      fp = open_file_ptrs_[num_open_files_++] = IEEEopen(name1, "w");
+    is_new_file_[i] = YES;
+
+  } 
+
+  return fp;
+}
+
+Boolean MPIFS::is_new_file(char *name) {
+  char name1[100];
+  if (node_type() == PART_TIME_COMPUTE || node_type() == COMPUTE_NODE) return;
+  sprintf(name1, "%s.%d", name, my_rank(IO_NODE));
+
+  for (int i=0; i<num_open_files_; i++) 
+    if (!strcmp(name1, open_file_names_[i])) {
+      if (is_new_file_[i] == YES) {
+	is_new_file_[i] = NO;
+	return YES;
+      } else return NO;
+    }
+  printf("Can't find the file\n");
+  exit(0);
+  return NO;
+}
+
+
+void MPIFS::start_attribute_io(int msg_src, int msg_tag, MPI_Status *status)
+{
+  char *schema_buf;
+  int msg_len, i;
+  IOFile fp = 0;
+  
+  mpi_get_count(status, MPI_CHAR, &msg_len);
+  schema_buf = (char *) malloc(sizeof(char)*msg_len);
+  receive_message((void *)schema_buf, msg_len, MPI_CHAR, msg_src, 
+		  msg_tag, MPI_COMM_WORLD,status);
+  Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_CHAR, msg_tag);
+
+  char *ptr = schema_buf;
+  union int_to_char tmp;
+  for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+  int op_type = tmp.i;
+  for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+  int len = tmp.i;
+  char *fname = (char *)malloc(sizeof(char) * (len + 1));
+  char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+  for (i=0; i<len; i++) fname[i] = *ptr++;
+  fname[i] = '\0';
+  sprintf(name1, "%s.%d", fname, world_rank_);
+  fp = open_file(name1, op_type);
+  Attribute *attr = new Attribute(ptr, op_type);
+  if (op_type == TIMESTEP) attr->write_data(fp);  
+  else if (op_type == READ_TIMESTEP) {
+    attr->read_data(fp);
+    if (am_master_io_node()) send_attr_data(attr);
+  }
+  delete attr;
+  free(schema_buf);
+}
+
+void MPIFS::start_collective_io(int msg_src, int msg_tag, MPI_Status *status)
+{
+  int *schema_buf, msg_len, comp_app_num = (msg_tag / 10);
+  Collective_IO *new_io;
+  IOFile fp = 0;
+  
+  mpi_get_count(status, MPI_INT, &msg_len);
+  schema_buf = (int *) malloc(sizeof(int)*msg_len);
+  receive_message((void *)schema_buf, msg_len, MPI_INT, msg_src, 
+		  msg_tag, MPI_COMM_WORLD,status);
+  Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_INT, msg_tag);
+
+  int len = schema_buf[2];
+  char *name = (char *)malloc(sizeof(char) * (len + 1));
+  char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+  for (int i=0; i<len; i++) name[i] = schema_buf[3+i];
+  name[i] = '\0';
+  sprintf(name1, "%s.%d", name, world_rank_);
+  fp = open_file(name1, schema_buf[1]);
+  free(name);
+
+  switch(schema_buf[0]){
+    case SIMPLE_IO:
+      new_io = new Simple_IO(schema_buf, msg_len,world_rank_, comp_app_num,
+			     compute_apps_info_[comp_app_num]->app_size(),
+			     compute_apps_info_[comp_app_num], fp);
+      break;
+    
+    default:
+      printf("Error in start_collective_io - undefined strategy\n");
+      exit(1);
+      break;
+  } 
+
+#ifdef DEBUG
+  printf("Starting the collective IO for compute app %d\n",comp_app_num);
+#endif
+  new_io->start_to_finish(NO, NULL);
+#ifdef DEBUG
+  printf("Finished the collective I/O for compute app %d\n", comp_app_num);
+#endif
+  release_compute_nodes(comp_app_num);
+}
+
+
+void MPIFS::part_time_io_node_loop(int *schema_buf, int msg_len, 
+				   Array *array)
+{
+  Collective_IO *new_io;
+  IOFile fp = 0;
+
+  int len = schema_buf[2];
+  char *name = (char *)malloc(sizeof(char) * (len + 1));
+  char *name1 = (char *)malloc(sizeof(char) * (len + 6));
+  for (int i=0; i<len; i++) name[i] = schema_buf[3+i];
+  name[i] = '\0';
+  sprintf(name1, "%s.%d", name, world_rank_);
+  fp = open_file(name1, schema_buf[1]);
+  free(name);
+
+  switch(schema_buf[0]){
+    case SIMPLE_IO:
+      new_io  = new Simple_IO(schema_buf, msg_len, world_rank_,
+			      1, app_size_, app_info_, fp);
+      break;
+
+    default:
+      printf("Error in part_time_io_node_loop - undefined strategy\n");
+      exit(1);
+      break;
+  } 
+  new_io->start_to_finish(YES, array);
+  release_compute_nodes(1);
+}
+
+void MPIFS::compute_node_io_loop(Array *array)
+{
+  Simple_IO *simple;
+
+  switch(array->io_strategy()){
+  case SIMPLE_IO:
+    simple = new Simple_IO();
+    simple->compute_node_io_loop(array);
+    break;
+
+  default:
+    printf("Error in MPIFS::compute_node_io_loop - Undefined i/o strategy\n");
+    exit(1);
+    break;
+  }
+  compute_side_io_done();
+}
+
+
+int MPIFS::app_size(int node_type)
+{
+  if (node_type == COMPUTE_NODE)
+  {
+	if (node_type_ != IO_NODE)
+		return app_size_;
+	else { 
+		printf("Error in MPIFS::app_size - wrong node_type\n");
+		exit(10);
+	     }
+   }
+   else if (node_type == IO_NODE)
+   {
+	if (node_type_ == IO_NODE) return app_size_;
+	else if (node_type_ == PART_TIME_IO) return io_app_size_;
+	else {
+		printf("Error in MPIFS::app_size - wrong node_type\n");
+		exit(10);
+	     }
+   }
+   else 
+   {
+		printf("Error in MPIFS::app_size - wrong node_type\n");
+		exit(10);
+	     
+   }
+   return -1;
+}
+
+int MPIFS::my_rank(int node_type)
+{
+  if (node_type == COMPUTE_NODE)
+  {
+	if (node_type_ != IO_NODE)
+		return app_rank_;
+	else { 
+		printf("Error in MPIFS::my_rank - wrong node_type\n");
+		exit(10);
+	     }
+   }
+   else if (node_type == IO_NODE)
+   {
+	if (node_type_ == IO_NODE) return app_rank_;
+	else if (node_type_ == PART_TIME_IO) return io_app_rank_;
+	else {
+		printf("Error in MPIFS::my_rank - wrong node_type\n");
+		exit(10);
+	     }
+   }
+   else 
+   {
+		printf("Error in MPIFS::my_rank - wrong node_type\n");
+		exit(10);
+	     
+   }
+   return -1;
+}
+
+int MPIFS::node_type(){return node_type_;}
+
+void MPIFS::send_array_schema(Array *array)
+{
+  int *schema, schema_size;
+  if (am_master_compute_node()){
+    array->pack(&schema, &schema_size);
+    send_message((void *)schema, schema_size, MPI_INT, master_io_node_,
+		 app_num_*10+ARRAYGROUP_SCHEMA, MPI_COMM_WORLD);
+  }
+}
+
+void MPIFS::receive_attr_data(Attribute *attr)
+{
+  int msg_len, i;
+  MPI_Status status;
+  char *ptr;
+
+  MPI_Probe(MPI_ANY_SOURCE, ATTRIBUTE_DATA, MPI_COMM_WORLD, &status); 
+  mpi_get_count(&status, MPI_CHAR, &msg_len);
+  void *data_buf = (void *) malloc(msg_len);
+  receive_message((void *)data_buf, msg_len, MPI_CHAR, status.MPI_SOURCE,
+		  ATTRIBUTE_DATA, MPI_COMM_WORLD, &status);
+  Broadcast(COMPUTE_NODE, data_buf, msg_len, MPI_CHAR, ATTRIBUTE_DATA);
+
+  ptr = (char *)data_buf;
+  union int_to_char tmp;
+  for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+  attr->set_esize(tmp.i); 
+  for (i=0; i<4; i++) tmp.c[i] = *ptr++;
+  attr->set_count(tmp.i); 
+  attr->set_data_ptr(ptr);
+}
+
+void MPIFS::send_attr_data(Attribute *attr)
+{ 
+  int i;
+  void *data_buf = (void *)malloc((attr->data_size() + 8));
+  char *ptr = (char *)data_buf;
+  union int_to_char tmp;
+  tmp.i = attr->esize();
+  for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+  tmp.i = attr->count();
+  for (i=0; i<4; i++) *ptr++ = tmp.c[i];
+  memcpy(ptr, attr->get_data_ptr(), attr->data_size());
+
+  int master_comp_node = compute_apps_info_[1]->get_master();
+  send_message(data_buf, attr->data_size()+8, MPI_CHAR, 
+	       master_comp_node, ATTRIBUTE_DATA, MPI_COMM_WORLD);
+  free(data_buf);
+}
+
+void MPIFS::receive_attr_schema()
+{
+  char *schema_buf;
+  int msg_len;
+  MPI_Status status;
+
+  MPI_Probe(MPI_ANY_SOURCE, ATTRIBUTE_SCHEMA, MPI_COMM_WORLD, &status); 
+  mpi_get_count(&status, MPI_CHAR, &msg_len);
+  schema_buf = (char *) malloc(sizeof(char) * msg_len);
+  receive_message((void *)schema_buf, msg_len, MPI_CHAR, status.MPI_SOURCE,
+		  ATTRIBUTE_SCHEMA, MPI_COMM_WORLD, &status);
+  Broadcast(IO_NODE, (void *)schema_buf, msg_len, MPI_CHAR, ATTRIBUTE_SCHEMA);
+  free(schema_buf);
+}
+
+void MPIFS::send_attr_schema(Attribute *attr, char *fname, int op_type)
+{
+  char *schema;
+  int schema_size;
+
+  if (am_master_compute_node()){
+    attr->pack(schema_size, schema, fname, op_type);
+    send_message((void *)schema, schema_size, MPI_CHAR, master_io_node_,
+                 ATTRIBUTE_SCHEMA, MPI_COMM_WORLD);
+  }
+  free(schema);
+}
+
+/* Called from the compute node side */
+void MPIFS::user_commands(int cmd)
+{
+  if (node_type_ == COMPUTE_NODE)
+  {
+    int tag = cmd*10+SPECIAL;
+    if (am_master_compute_node()){
+      send_message((void *)&app_num_,  1, MPI_INT, master_io_node_, 
+		   tag,	MPI_COMM_WORLD);
+    }
+	
+    if (cmd == APP_BARRIER){
+      MPI_Barrier(*(app_info_->intra_comm()));
+    } else if (cmd == GLOBAL_BARRIER){
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+  }
+  else {
+    /* Must be the part_time I/O case */
+    switch(cmd){
+    case APP_BARRIER:
+      MPI_Barrier(*comm_);
+      break;
+    case GLOBAL_BARRIER:
+      MPI_Barrier(MPI_COMM_WORLD);
+      break;
+    case CLEANFILES:
+      if (node_type_ == PART_TIME_IO)
+	cleanfiles(app_num_);
+      break;
+    case FLUSHFILES:
+      if (node_type_ == PART_TIME_IO)
+	flushfiles(app_num_);
+      break;
+    case CREATEFILES:
+      if (node_type_ == PART_TIME_IO)
+	createfiles(app_num_);
+      break;
+    default:
+      printf("Unknown message code %d\n", cmd);
+      break;
+    }
+  }
+}    
+
+/* This function is called by io nodes or part-time io nodes */
+void MPIFS::release_compute_nodes(int app_num)
+{
+  App_Info *app;
+
+  if (node_type_ == PART_TIME_IO){
+    MPI_Barrier(MPI_COMM_WORLD);
+  } else if ((node_type_ == IO_NODE) && (num_apps_ == 1)){
+    MPI_Barrier(*comm_);
+    if (am_master_io_node()){
+      app = find_compute_app(app_num);
+      send_message(&app_num, 1, MPI_INT, app->get_master(), COMP_QUIT,
+		   MPI_COMM_WORLD);
+    }
+  } else {
+    printf("Error in release_compute_nodes - wrong node type \n");
+    exit(11);
+  }
+}
+
+/* This function is called by the compute nodes after they have done their part */
+void MPIFS::compute_side_io_done()
+{
+  int app_num;
+  MPI_Status status;
+
+  if (node_type_ == PART_TIME_COMPUTE){
+    MPI_Barrier(MPI_COMM_WORLD);
+  } else if (node_type_ == COMPUTE_NODE){
+    if (am_master_compute_node())
+      receive_message((void *)&app_num, 1, MPI_INT, master_io_node_, COMP_QUIT,
+		      MPI_COMM_WORLD, &status);
+    MPI_Barrier(*comm_);
+  } else {
+    printf("Error in compute_side_io_doen - wrong node type\n");
+    exit(11);
+  }
+}
+
+App_Info* MPIFS::io_app_info(){
+  return io_app_info_;
+}
+
+int MPIFS::master_io_node(){
+  return master_io_node_;
+}
+
+int MPIFS::mem_buf_size()
+{
+  return mem_buf_size_;
+}
+
+char *MPIFS::mem_buf()
+{
+  return mem_buf_;
+}
+
+void MPIFS::set_mem_buf_size(int size)
+{
+  mem_buf_size_ = size;
+}
+
+void MPIFS::set_mem_buf(char *buf)
+{
+  mem_buf_ = buf;
+}
diff --git a/src/Panda/MPIFS.h b/src/Panda/MPIFS.h
new file mode 100644
index 0000000..4fbedfe
--- /dev/null
+++ b/src/Panda/MPIFS.h
@@ -0,0 +1,95 @@
+#ifndef MPIFS_dot_h
+#define MPIFS_dot_h
+
+#include "definitions.h"
+#include "VirtFS.h"
+#include "mpi.h"
+#include "App_Info.h"
+#include "List.h"
+#include "Attribute.h"
+
+
+class Collective_IO;
+class Array;
+
+
+class MPIFS : public VirtFS {
+  int              node_type_;      /* compute,io,part_time .. */
+  int              world_rank_;     /* rank in MPI_COMM_WORLD */
+  int              app_num_;        /* for io-nodes this should be 0 */
+  int              app_rank_;       /* rank within the applications */
+  int              app_size_;       /* size of the application */
+  int              master_io_node_;
+  MPI_Comm         *comm_;
+  App_Info         *app_info_;      /* rank --> world mapping */
+
+  /* Information used by the IO nodes */
+  int              num_apps_;          /* # of compute apps */
+  int              num_apps_alive_;   
+  int              current_max_app_num_;
+  int              global_barrier_count_; 
+  App_Info         **compute_apps_info_;
+  char		   *mem_buf_;
+  int		   mem_buf_size_;
+  int		   num_open_files_;
+  IOFile	   open_file_ptrs_[1000];
+  char	   	   *open_file_names_[1000];
+  Boolean          is_new_file_[1000];
+
+  /* Information required for part-time nodes */
+  int              io_app_num_;
+  int              io_app_rank_;
+  int              io_app_size_;
+  MPI_Comm         *io_comm_;
+  App_Info         *io_app_info_;
+
+  void             do_init(int,int,int,int,int*);
+  void             do_init(int,int,int,int*, int,int,int*);
+  void             wait_for_next_message(int*,int*,int*, MPI_Status*);
+  void             process_io_special_message(int,int, MPI_Status*);
+  void             cleanfiles(int);
+  void             createfiles(int);
+  void             flushfiles(int);
+  void             insert_compute_app(int , App_Info*);
+  App_Info*        find_compute_app(int);
+  Boolean          received_quit_message(int,int,MPI_Status*);
+
+
+ public:
+                   MPIFS(int,int,int, int,int*);
+                   MPIFS(int,int,int,int,int*, Boolean);
+                   MPIFS(int,int,int,int*,int,int,int*);
+  virtual          ~MPIFS();
+  Boolean          am_master_compute_node();
+  Boolean          am_compute_node();
+  Boolean          am_master_io_node();
+  Boolean          am_io_node();
+  void             Broadcast(int,void*, int,MPI_Datatype,int);
+  void             io_node_main_loop();
+  void             start_collective_io(int,int, MPI_Status*);
+  void 		   start_attribute_io(int,int,MPI_Status*);
+  void             part_time_io_node_loop(int*,int, Array*);
+  void             compute_node_io_loop(Array*);
+
+  int              app_size(int);
+  int              my_rank(int);
+  void             send_array_schema(Array*);
+  void             send_attr_schema(Attribute*, char*, int);
+  void             receive_attr_schema();
+  void 		   send_attr_data(Attribute *);
+  void		   receive_attr_data(Attribute *);
+  int              node_type();
+  void             user_commands(int);
+  void             release_compute_nodes(int);
+  void             compute_side_io_done();
+  App_Info*        io_app_info();
+  int              master_io_node();
+  int		   mem_buf_size();
+  void		   set_mem_buf_size(int);
+  char*		   mem_buf();
+  void		   set_mem_buf(char *);	
+  IOFile	   open_file(char *, int);
+  Boolean          is_new_file(char *);
+};
+
+#endif
diff --git a/src/Panda/Panda.C b/src/Panda/Panda.C
new file mode 100644
index 0000000..6b933e6
--- /dev/null
+++ b/src/Panda/Panda.C
@@ -0,0 +1,153 @@
+#include "definitions.h"
+#include "Panda.h"
+
+
+/* Currently the sequential case is unsupported */
+
+extern MPIFS *MPIFS_global_obj;
+int  global_system_type_;
+
+Panda::Panda()
+{
+  file_system_type_ = UNIX_SYSTEM;
+  file_system_ = NULL;
+}
+
+/* This is the interface for regular Panda (i.e no part-time io nodes) */
+Panda::Panda(int node_type, int app_num , int relative_rank,int app_size,
+		int* world_ranks)
+{
+  global_system_type_ = file_system_type_  = MPI_SYSTEM;
+  file_system_ = new MPIFS(node_type, app_num, relative_rank, app_size,
+			world_ranks);
+}
+
+/* This is the interface for regular Panda (i.e no part-time io nodes) */
+Panda::Panda(int node_type, int app_num , int relative_rank,int app_size,
+		int* world_ranks, Boolean shared_flag)
+{
+  global_system_type_ = file_system_type_  = MPI_SYSTEM;
+  file_system_ = new MPIFS(node_type, app_num, relative_rank, app_size,
+			world_ranks, shared_flag);
+}
+
+/* This is the interface for part-time io nodes */
+Panda::Panda(int node_type, int comp_rank, int comp_size, int *comp_world_ranks,
+	int io_rank, int io_size, int *io_world_ranks)
+{
+  global_system_type_ = file_system_type_  = MPI_SYSTEM;
+  file_system_ = new MPIFS(node_type, comp_rank, comp_size, comp_world_ranks,
+		io_rank, io_size, io_world_ranks);
+}
+
+/* This is the simplest interface for initializing the I/O and compute nodes.
+ * It can be used only for regular and shread i/o (i.e it cannot be used for
+ * part-time i/o nodes. The constructor assumes that MPIRUN library has been 
+ * installed and you have distinct applications at the mpirun level 
+ */
+
+/*
+Panda::Panda(int node_type)
+{
+  int app_size, app_rank, *world_ranks, leader;
+
+  file_system_type_ = MPI_SYSTEM;
+  if ((node_type == COMPUTE_NODE) || (node_type == IO_NODE)){
+    MPI_Comm_size(MPIRUN_APP_COMM, &app_size);
+    MPI_Comm_rank(MPIRUN_APP_COMM, &app_rank);
+    leader = MPIRUN_APP_LEADERS[MPIRUN_APP_ID];
+    world_ranks = (int *) malloc(sizeof(int)*app_size);
+    for(int i=0; i<app_size;i++)
+      world_ranks[i] = leader + i;
+    file_system_ = new MPIFS(node_type, MPIRUN_APP_ID, app_rank, app_size,
+			     world_ranks);
+    free(world_ranks);
+    world_ranks = NULL;
+  } else {
+    printf("Error: Invalid constructor for this node_type %d\n", node_type);
+    exit(1);
+  }
+}
+*/
+
+/* This is the simplest interface for the part-time i/o nodes. Here the number
+ * of i/o nodes is specified. The first <num_io_nodes> are designated as part
+ * time i/o nodes and the remaining as part-time compute. This requires mpirun
+ * library to be initialized and there should be only one mpirun application.
+ */
+/*
+Panda::Panda(int node_type, int num_io_nodes)
+{
+  int app_rank, app_size, *io_ranks, *world_ranks;
+
+  file_system_type_ = MPI_SYSTEM;
+  if ((node_type == COMPUTE_NODE) || (node_type == IO_NODE)){
+    printf("Error: Invalid constructor\n");
+    exit(1);
+  } else if (MPIRUN_NUM_APPS == 1){
+    MPI_Comm_size(MPIRUN_APP_COMM, &app_size);
+    MPI_Comm_rank(MPIRUN_APP_COMM, &app_rank);
+    world_ranks = (int *)malloc(sizeof(int)*app_size);
+    io_ranks = (int *) malloc(sizeof(int)*num_io_nodes);
+    for(int i=0;i<app_size; i++) world_ranks[i] = i;
+    for(i=0;i<num_io_nodes;i++) io_ranks[i] =i;
+    if (app_rank < num_io_nodes){
+      file_system_ = new MPIFS(PART_TIME_IO, app_rank, app_size, world_ranks,
+			       app_rank, num_io_nodes, io_ranks);
+    } else {
+      file_system_ = new MPIFS(PART_TIME_COMPUTE, app_rank, app_size, world_ranks,
+			       -1, num_io_nodes, io_ranks);
+    }
+  } else {
+    printf("Error: Part-time I/O nodes - More than one mpirun app running\n");
+    exit(1);
+  }
+}
+*/
+  
+Panda::~Panda()
+{
+  if (file_system_) delete file_system_;
+  file_system_ = NULL;
+
+}
+
+void Panda::global_barrier()
+{
+  if (file_system_type_ == MPI_SYSTEM)
+  {
+     ((MPIFS *) file_system_)->user_commands(GLOBAL_BARRIER);
+  }
+}
+
+void Panda::app_barrier()
+{
+  if (file_system_type_ == MPI_SYSTEM)
+  {
+     ((MPIFS *) file_system_)->user_commands(APP_BARRIER);
+  }
+}
+
+void Panda::flushfiles()
+{
+  if (file_system_type_ == MPI_SYSTEM)
+  {
+     ((MPIFS *) file_system_)->user_commands(FLUSHFILES);
+  }
+}
+
+void Panda::cleanfiles()
+{
+  if (file_system_type_ == MPI_SYSTEM)
+  {
+     ((MPIFS *) file_system_)->user_commands(CLEANFILES);
+  }
+}
+
+void Panda::createfiles()
+{
+  if (file_system_type_ == MPI_SYSTEM)
+  {
+     ((MPIFS *) file_system_)->user_commands(CREATEFILES);
+  }
+}
diff --git a/src/Panda/Panda.h b/src/Panda/Panda.h
new file mode 100644
index 0000000..c2515b3
--- /dev/null
+++ b/src/Panda/Panda.h
@@ -0,0 +1,31 @@
+#ifndef Panda_dot_h
+#define Panda_dot_h
+
+#include "VirtFS.h"
+#include "MPIFS.h"
+
+
+class Panda {
+  int file_system_type_;
+  VirtFS *file_system_;
+
+public:
+  Panda();
+  Panda(int, int, int,int, int*);
+  Panda(int, int, int,int, int*, Boolean);
+  Panda(int,int,int,int*,int,int,int*);
+  Panda(int);
+  Panda(int, int);
+  ~Panda();
+
+  /* stuff required only for testing purposes */
+  void global_barrier();
+  void app_barrier();
+  void cleanfiles();
+  void flushfiles();
+  void createfiles();
+};
+
+#endif
+
+  
diff --git a/src/Panda/Shared_IO.C b/src/Panda/Shared_IO.C
new file mode 100644
index 0000000..8b5a5cd
--- /dev/null
+++ b/src/Panda/Shared_IO.C
@@ -0,0 +1,237 @@
+#include "definitions.h"
+#include "ArrayGroup.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Array.h"
+#include "message.h"
+#include "Shared_IO.h"
+
+
+extern MPIFS* MPIFS_global_obj;
+extern int    SUBCHUNK_SIZE;
+
+Shared_IO::Shared_IO(int *schema_string, int schema_size, int world_rank,
+		     int comp_app_num,int comp_app_size , App_Info *app_info)
+: Simple_IO(schema_string, schema_size, world_rank, comp_app_num, 
+	    comp_app_size, app_info)
+{
+
+ compute_chunk_ = new Chunk();
+ current_chunk_ = new Chunk();
+ subchunk_ = new Chunk();
+ current_array_id_ = -1;
+ if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+     (op_type_ == READ_TIMESTEP))
+   read_op_ = YES;
+ else 
+   read_op_ = NO;
+
+ /* We need to set the following variables so that continue_io()*
+  * would start the I/O of the first subchunk automatically     */
+ contiguous_ = NO;
+ current_array_id_ = -1;
+ current_chunk_id_ = 0;
+ num_of_chunks_ = -1;   /* This will cause get_next_chunk() to fail */
+ current_subchunk_id_ = 0;
+ num_of_subchunks_ = -1; /* Causes get_next_subchunk() to fail */
+ status_flag_ = START;
+ continue_io();
+}
+
+Shared_IO::~Shared_IO()
+{
+  if (subchunk_) delete subchunk_;
+  if (compute_chunk_) delete compute_chunk_;
+  subchunk_ = compute_chunk_ = NULL;
+}
+
+Boolean Shared_IO::get_next_array(){
+  current_array_id_++;
+  if (current_array_id_ < num_of_arrays_){
+    make_subchunks_ = -1;
+    current_array_ = find_array(current_array_id_);
+    nat_chunked_ = current_array_->nat_chunked();
+    sub_chunked_ = current_array_->sub_chunked();
+    array_rank_ = current_array_->rank();
+ 
+    if (array_rank_ > max_rank_){
+      realloc_schema_bufs(array_rank_);
+    } 
+    num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+    current_chunk_id_ = -1;
+    if (nat_chunked_ && !sub_chunked_)
+      contiguous_ = YES; /* No need to use derived datatypes */
+    else
+      contiguous_ = NO;  /* Have to use derived datatypes */
+    
+    bytes_to_go_ = 0;
+    current_subchunk_id_ = -1;
+    return YES;
+  } else
+    return NO;
+}
+
+
+Boolean Shared_IO::get_next_chunk()
+{
+  int *ptr;
+
+  if (!current_array_) return NO;
+  current_chunk_id_ = current_array_->get_next_index(current_chunk_id_,
+						     my_io_rank_,
+						     num_io_nodes_);
+  if (current_chunk_id_ < num_of_chunks_){
+    current_chunk_->set_data_ptr(NULL);
+    current_chunk_->init(current_array_, current_chunk_id_,
+			 IO_NODE, NO_ALLOC);
+    if (contiguous_){
+      bytes_to_go_ = current_chunk_->total_size_in_bytes();
+      current_chunk_->set_data_ptr(mem_buf_);
+      ptr = schema_bufs_[0];
+      *ptr++ = current_array_id_;
+      *ptr++ = current_chunk_id_;
+      *ptr++ = (int) nat_chunked_;
+      *ptr++ = (int) contiguous_;
+      *ptr++ = op_type_;
+      *ptr++ = 0;
+      *ptr++ = 0;
+      compute_chunk_overlaps(current_array_, current_chunk_);
+     }
+    else {
+      if (!sub_chunked_ && (make_subchunks_ == -1)){
+	current_array_->make_sub_chunks(current_chunk_);
+	make_subchunks_  = 1;
+      }
+      num_of_subchunks_ = current_array_->layout(SUB_CHUNK)->total_elements();
+      current_subchunk_id_ = -1;
+    }
+    return YES;
+  }
+  else
+    return NO;
+}
+
+
+/* This should not be called for the contiguous_ case */
+Boolean Shared_IO::get_next_subchunk()
+{
+  current_subchunk_id_++;
+  if (current_subchunk_id_ < num_of_subchunks_){
+    subchunk_->set_data_ptr(NULL);
+    subchunk_->init(current_chunk_, current_subchunk_id_, NO_ALLOC);
+    bytes_to_go_ = subchunk_->total_size_in_bytes();
+    
+    if (bytes_to_go_ < mem_buf_size_)
+      realloc_mem_bufs(bytes_to_go_);
+    
+    subchunk_->set_data_ptr(mem_buf_);
+    return YES;
+  }
+  else
+    return NO;
+}
+
+
+void  Shared_IO::start_subchunk_io()
+{
+  int *ptr;
+
+  if (contiguous_){
+    ptr = schema_bufs_[0];
+    ptr[6] = min(SUBCHUNK_SIZE, bytes_to_go_);
+
+    nb_send_message((void *)ptr, 7, MPI_INT, dest_ids_[0], 
+		    CHUNK_SCHEMA, MPI_COMM_WORLD, &schema_requests_[0]);
+    if (read_op_){
+      read_data(mem_buf_, ptr[6]);
+      nb_send_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+			CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+    }
+    else
+      nb_receive_message((void *)mem_buf_, ptr[6], MPI_CHAR, dest_ids_[0],
+			 CHUNK_DATA_TO_IO, MPI_COMM_WORLD, &requests_[0]);
+    ptr[5] += ptr[6];  /* Offset of the next subchunk */
+    bytes_to_go_ -= ptr[6];
+    status_flag_ = WAITING;
+
+    } else {
+      compute_chunk_overlaps(current_array_, subchunk_);
+
+      compute_schemas(current_array_, subchunk_, compute_chunk_,
+		      current_array_id_);
+      if (read_op_){
+	read_data(subchunk_);
+	send_data_to_compute_nodes(subchunk_, NULL, NULL);
+      }
+      else
+	receive_data_from_compute_nodes(subchunk_, NULL, NULL);
+      status_flag_ = WAITING;
+    }
+}
+
+
+Boolean Shared_IO::test_subchunk_io()
+{
+  int flag;
+  MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+  if (flag) {
+    /* Free schema request objects - Do we need to do this */
+    MPI_Waitall(num_overlaps_, schema_requests_,statuses_);
+    status_flag_ = START;
+    if (!read_op_)
+      if (contiguous_)
+	write_data(mem_buf_, schema_bufs_[0][6], 1);
+      else
+	write_data(subchunk_);
+
+    if (!contiguous_) free_datatypes();
+    return YES;
+  } 
+  return NO;
+}
+
+
+/* Return YES, if I/O is complete */
+Boolean Shared_IO::continue_io()
+{
+  if (status_flag_ == START){
+    if (!start_next_subchunk_io()) return YES; /* IO completed */
+  } else if (status_flag_ == WAITING){
+    if (test_subchunk_io())
+      if (!start_next_subchunk_io()) return YES; /* IO done */
+  } else {
+    printf("Error - Invalid status_flag value \n");
+    exit(11);
+  }
+  return NO;
+}
+
+/* Return yes if you can start the io of another subchunk */
+Boolean Shared_IO::start_next_subchunk_io()
+{
+  if (contiguous_){
+    if (bytes_to_go_ <= 0){
+      while(!get_next_chunk()){
+	if (!get_next_array()) return NO;
+      }
+      /* Since we might be looking at another array */
+      if (!contiguous_) get_next_subchunk();
+    }
+    
+    start_subchunk_io();
+  } else {
+    
+    if (!get_next_subchunk()){
+      /* We have finished this chunk */
+      while(!get_next_chunk()){
+	if (!get_next_array()) return NO;
+      }
+      if (!contiguous_) get_next_subchunk();
+    }
+    
+    start_subchunk_io();
+  }
+  return YES;
+}
+       
diff --git a/src/Panda/Shared_IO.h b/src/Panda/Shared_IO.h
new file mode 100644
index 0000000..8f29eca
--- /dev/null
+++ b/src/Panda/Shared_IO.h
@@ -0,0 +1,32 @@
+#ifndef Shared_IO_dot_h
+#define Shared_IO_dot_h
+
+#include "Simple_IO.h"
+class Chunk;
+
+class Shared_IO : public Simple_IO
+{
+ protected:
+  int             current_array_id_;
+  int             status_flag_;
+  Chunk           *subchunk_;
+  Chunk           *compute_chunk_;
+  Boolean         read_op_;
+  int             bytes_to_go_;
+  int             make_subchunks_;
+  
+  Boolean         get_next_chunk();
+  Boolean         get_next_array();
+  Boolean         get_next_subchunk();
+  Boolean         start_next_subchunk_io();
+  void            start_subchunk_io();
+  Boolean         test_subchunk_io();
+    
+ public:
+                  Shared_IO(int*,int,int,int,int, App_Info*);
+  virtual        ~Shared_IO();
+  virtual Boolean continue_io();
+};
+
+#endif
+  
diff --git a/src/Panda/Simple_IO.C b/src/Panda/Simple_IO.C
new file mode 100644
index 0000000..a3c7bc0
--- /dev/null
+++ b/src/Panda/Simple_IO.C
@@ -0,0 +1,846 @@
+#include "definitions.h"
+#include "MPIFS.h"
+#include "Chunk.h"
+#include "App_Info.h"
+#include "Simple_IO.h"
+#include "Array.h"
+#include "message.h"
+
+#include "external/IEEEIO/src/Arch.h"
+#include "external/IEEEIO/src/IOProtos.h"
+
+extern MPIFS* MPIFS_global_obj;
+extern int SUBCHUNK_SIZE;
+
+extern "C" {
+  int IOreserveChunk(IOFile,int,int,int*);
+  int IOwriteStream(IOFile,void*,int);
+  int IOreadStream(IOFile,void*,int);
+  int IOwriteAttribute(IOFile, char *, int, int, void *);
+  int IOreadInfo(IOFile,int *,int *,int *,int);
+  int IOreadAttributeInfo(IOFile, char *,int *, int *);
+  int IOreadAttribute(IOFile,int,void*);
+}
+
+/* This constructor is needed by the compute node to create a dummy object.
+ * The dummy object is needed so that the compute node can execute the 
+ * specialized compute node io loop
+ */
+Simple_IO::Simple_IO()
+{
+  dummy_ = YES;
+  schema_string_ = current_schema_ptr_ = NULL;
+  current_array_ =NULL;
+  current_chunk_ = NULL;
+  num_io_nodes_ = -1;
+  my_io_rank_ = -1;
+  compute_app_num_ = -1;
+  app_info_ = NULL;
+  part_time_io_ = NO;
+  compute_node_array_ =NULL;
+  overlap_chunk_ids_ = dest_ids_ = NULL;
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_);
+  schema_requests_ = NULL;
+  requests_ =NULL;
+  statuses_ =NULL;
+  datatypes_ = NULL;
+  schema_bufs_ = NULL;
+  data_ptrs_ = NULL;
+  overlap_base_ = overlap_size_ =overlap_stride_ =NULL;
+  mem_buf_ = NULL;
+}
+
+Simple_IO::Simple_IO(int *schema_string, int schema_size, int world_rank,
+		     int comp_app_num, int comp_app_size , App_Info *app_info,
+		     IOFile fp) 
+{
+  int schema_buf_size;
+
+  dummy_                = NO;
+  schema_string_ 	= schema_string;
+  schema_size_       	= schema_size;
+  current_schema_ptr_	= schema_string;
+  num_io_nodes_ 	= MPIFS_global_obj->app_size(IO_NODE);
+  my_io_rank_		= MPIFS_global_obj->my_rank(IO_NODE);
+  compute_app_num_	= comp_app_num;
+  app_info_              = app_info;
+  world_rank_            = world_rank;
+
+  num_overlaps_         = 0;
+  max_overlaps_         = comp_app_size;
+  overlap_chunk_ids_    = (int *) malloc(sizeof(int)*max_overlaps_);
+  dest_ids_             = (int *) malloc(sizeof(int)*max_overlaps_);
+  schema_bufs_          = (int **) malloc(sizeof(int *) *max_overlaps_);
+  requests_             = (MPI_Request*)malloc(sizeof(MPI_Request)*max_overlaps_);
+  schema_requests_      = (MPI_Request*)malloc(sizeof(MPI_Request)*max_overlaps_);
+  statuses_             = (MPI_Status*) malloc(sizeof(MPI_Status)*max_overlaps_);
+  datatypes_            = (MPI_Datatype*)malloc(sizeof(MPI_Datatype)*max_overlaps_);
+  max_rank_             = 10;
+  overlap_base_         = (int *) malloc(sizeof(int)*max_rank_);
+  overlap_stride_       = (int *) malloc(sizeof(int)*max_rank_);
+  overlap_size_         = (int *) malloc(sizeof(int)*max_rank_);
+  data_ptrs_            = (char **) malloc(sizeof(char*)*max_overlaps_);
+  part_time_io_		= NO;
+  compute_node_array_   = NULL;
+  mem_buf_size_ 	= MPIFS_global_obj->mem_buf_size();
+  mem_buf_              = MPIFS_global_obj->mem_buf();
+
+  schema_buf_size = 6+ max_rank_*3;  
+  for(int i=0; i < max_overlaps_; i++){
+    data_ptrs_[i] = NULL;
+    schema_bufs_[i] = (int *) malloc(sizeof(int)*schema_buf_size);
+  }
+  
+  current_array_ = new Array(&schema_string);
+
+  current_chunk_	= NULL;
+  num_of_chunks_	= 0;
+  num_of_subchunks_	= 0;
+  current_chunk_id_ 	= -1;
+  current_subchunk_id_	= -1;
+  file_ptr_		= NULL;
+  schema_file_ptr_	= NULL;
+  file_ptr_ = fp;
+}
+
+Simple_IO::~Simple_IO()
+{
+  if (dummy_){
+  } else {
+
+    /* This is the object created for the I/O nodes */
+
+    if (current_array_) delete current_array_;
+    if (schema_string_) free(schema_string_);
+    if (overlap_chunk_ids_) free(overlap_chunk_ids_);
+    if (dest_ids_) free(dest_ids_);
+    if (requests_) free(requests_);
+    if (schema_requests_) free(schema_requests_);
+    if (statuses_) free(statuses_);
+    if (datatypes_) free(datatypes_);
+    if (overlap_base_) free(overlap_base_);
+    if (overlap_size_) free(overlap_size_);
+    if (overlap_stride_) free(overlap_stride_);
+    
+    if (schema_bufs_){
+      for(int i=0;i < max_overlaps_; i++){
+	if (schema_bufs_[i]) free(schema_bufs_[i]);
+	schema_bufs_[i] = NULL;
+      }
+      free(schema_bufs_);
+    }
+
+    if (data_ptrs_) free(data_ptrs_);
+    
+    schema_bufs_ = NULL;
+    data_ptrs_ = NULL;   
+    overlap_base_ = overlap_size_ = overlap_stride_ = NULL;
+    overlap_chunk_ids_ = dest_ids_ = NULL;
+    requests_ = NULL;
+    schema_requests_ = NULL;
+    statuses_ = NULL;
+    datatypes_ =NULL;
+    schema_string_ = NULL;	
+  }
+}
+
+void Simple_IO::realloc_buffers(int new_size)
+{
+  int schema_buf_size = 6+max_rank_*3;
+
+
+  overlap_chunk_ids_=(int *) realloc(overlap_chunk_ids_, new_size*sizeof(int));
+  schema_bufs_ = (int **) realloc(schema_bufs_, new_size*sizeof(int*));
+  dest_ids_ = (int *) realloc(overlap_chunk_ids_, new_size*sizeof(int));
+  requests_ = (MPI_Request*)realloc(requests_, new_size*sizeof(MPI_Request));
+  schema_requests_ = (MPI_Request*)realloc(schema_requests_, 
+					   new_size*sizeof(MPI_Request));
+  statuses_ = (MPI_Status*)realloc(statuses_, new_size*sizeof(MPI_Status));
+  datatypes_ = (MPI_Datatype*)realloc(datatypes_, 
+				      new_size*sizeof(MPI_Datatype));
+  data_ptrs_ = (char **) realloc(data_ptrs_, new_size*sizeof(char*));
+  for(int i=max_overlaps_;i<new_size;i++){
+    schema_bufs_[i] = (int *)malloc(sizeof(int)*schema_buf_size);
+    data_ptrs_[i] = NULL;
+  }
+  max_overlaps_ = new_size;
+}
+
+/* This is called only for the following cases                 *
+ *  - natural chunking with user-specified subchunking         *
+ *  - reorganization (with or without user-specified chunking) */
+void Simple_IO::compute_chunk_overlaps(Array *array, Chunk *subchunk)
+{
+  int num_compute_chunks;
+
+  if (nat_chunked_){
+    num_overlaps_ = 1;
+    overlap_chunk_ids_[0] = current_chunk_id_;
+  }
+  else{
+    num_compute_chunks = array->layout(COMPUTE_NODE)->total_elements();
+    if (num_compute_chunks > max_overlaps_) realloc_buffers(num_compute_chunks);
+    subchunk->chunk_overlaps(array, &num_overlaps_,
+			     overlap_chunk_ids_, COMPUTE_NODE);
+  }
+
+  for(int i=0; i < num_overlaps_;i++) {
+    dest_ids_[i]=app_info_->world_rank(array->which_node(overlap_chunk_ids_[i],
+						         COMPUTE_NODE));
+}
+
+#ifdef DEBUG
+  printf("For subchunk_id %d of chunk %d\n", current_subchunk_id_, 
+					     current_chunk_id_);
+  printf("The overlapping compute chunk ids are \n");
+  for(int k =0; k < num_overlaps_; k++) printf("%d ", overlap_chunk_ids_[k]);
+  printf("\n");
+#endif
+}
+
+
+/* This is called only for the following cases                 *
+ *  - natural chunking with user-specified subchunking         *
+ *  - reorganization (with or without user-specified chunking) */
+void Simple_IO::compute_schemas(Array *array, Chunk *subchunk , 
+				Chunk *compute_chunk)
+{
+  if (nat_chunked_ && !contiguous_ && !overlaped_){
+    subchunk->copy_base_size_stride(overlap_base_, overlap_size_, 
+				    overlap_stride_);
+    send_schema_message(0);
+    make_datatype(subchunk, 0);
+  }
+  else if (!nat_chunked_) {
+    for (int i=0; i< num_overlaps_; i++){
+      compute_chunk->init(array, overlap_chunk_ids_[i], COMPUTE_NODE, NO_ALLOC);
+      subchunk->compute_overlap(compute_chunk, overlap_base_, overlap_size_, 
+ 				overlap_stride_);
+      send_schema_message(i);
+      make_datatype(subchunk, i);
+    }
+  } else {
+    printf("Error - In Simple_IO::compute_schemas\n");
+    exit(1);
+  }
+}
+
+
+/* The chunk_id is in overlap_chunk_ids_[index], the dest is in  *
+ * in dest_ids_[index]. The rank,base,stride and size info is in *
+ * overlap_base, overlap_size, overlap_stride, array_rank_       */
+void Simple_IO::send_schema_message(int index)
+{
+   int *ptr = schema_bufs_[index];
+   int schema_size = 5+array_rank_*3;
+
+   *ptr++ = overlap_chunk_ids_[index];
+   *ptr++ = (int) nat_chunked_;
+   *ptr++ = (int) contiguous_;
+   *ptr++ = array_rank_;
+   *ptr++ = op_type_;
+
+   for(int i=0; i < array_rank_; i++) *ptr++ = overlap_base_[i];
+   for(i=0; i < array_rank_; i++) *ptr++ = overlap_size_[i];
+   for(i=0; i < array_rank_; i++) *ptr++ = overlap_stride_[i];
+
+   if (part_time_io_ && (dest_ids_[index] == world_rank_))
+     /* No need to send the  message */
+     schema_requests_[index] = MPI_REQUEST_NULL;
+   else 
+     nb_send_message((void *)schema_bufs_[index], schema_size, MPI_INT,
+		     dest_ids_[index], index*10+CHUNK_SCHEMA, MPI_COMM_WORLD,
+		     &schema_requests_[index]);
+}
+
+/* The overlap base, size, stride are in overlap_base, overlap_size, *
+ * and overlap_stride                                                */
+void Simple_IO::make_datatype(Chunk *subchunk, int index)
+{
+  void *ptr;
+  subchunk->make_datatype(overlap_base_, overlap_size_, overlap_stride_, 
+			  &ptr, &datatypes_[index]);
+  data_ptrs_[index] = (char *) ptr;
+}
+
+/* Again this function is called only for the following cases  *
+ * - natural chunking with user-specified subchunking          *
+ * - re-organization with/without user-specified chunking      *
+ * The case of natural chunking (with no user-specified        *
+ * subchunking) is handled seperately                          */
+
+void Simple_IO::receive_data(Chunk *subchunk, int index, int &array_bytes_to_go)
+{
+
+  if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+    /* Perform a mem copy of the required chunk */
+    copy_data(subchunk, index, NO, array_bytes_to_go);
+    requests_[index] = MPI_REQUEST_NULL;
+  } else 
+    nb_receive_message((void *)data_ptrs_[index], 1, datatypes_[index],
+		       dest_ids_[index], index*10+CHUNK_DATA_TO_IO, 
+		       MPI_COMM_WORLD, &requests_[index]);
+}
+
+/* Again this function is called only for the following cases  *
+ * - natural chunking with user-specified subchunking          *
+ * - re-organization with/without user-specified chunking      *
+ * The case of natural chunking (with no user-specified        *
+ * subchunking) is handled seperately                          */
+void Simple_IO::send_data(Chunk *subchunk, int index, int &array_bytes_to_go)
+{
+  if (part_time_io_ && (dest_ids_[index] == world_rank_)){
+    /* Perform a memory copy of the required chunk */
+    copy_data(subchunk, index, YES, array_bytes_to_go);
+    requests_[index] =MPI_REQUEST_NULL;
+  } else {
+    /* Send the required datatype using a non-blocking send */
+    nb_send_message((void *)data_ptrs_[index], 1, datatypes_[index],
+		    dest_ids_[index], index*10+CHUNK_DATA_FROM_IO, 
+		    MPI_COMM_WORLD, &requests_[index]);
+  }
+}
+  
+void Simple_IO::read_data(Chunk *subchunk)
+{
+  int size;
+  size = subchunk->total_size_in_bytes();
+  read_data((char *)(subchunk->data_ptr()), size, subchunk->element_size());
+}     
+
+void Simple_IO::read_data(char *buf, int size, int esize)
+{
+  int n,bytes_to_go=size,buf_size;
+  char *tmp_buf = buf;
+
+  while(bytes_to_go > 0){
+    buf_size = min(bytes_to_go, SUBCHUNK_SIZE);
+    n = IOreadStream(file_ptr_, (void *)tmp_buf, buf_size/esize);
+    if (n != buf_size){
+      printf("Error reading data - write only %d instead of %d bytes\n", 
+	      n, buf_size);
+//    exit(1);
+    } 
+    bytes_to_go -= buf_size;
+    tmp_buf += buf_size;
+  }
+}
+
+
+void Simple_IO::write_data(char *buf, int size, int esize)
+{
+  int n, bytes_to_go = size, buf_size;
+  char *tmp_buf = buf;
+
+  while(bytes_to_go > 0){
+    buf_size = min(bytes_to_go, SUBCHUNK_SIZE);
+    n = IOwriteStream(file_ptr_, (void *)tmp_buf, buf_size/esize);
+    if (n != buf_size){
+      printf("Error writing data - write only %d instead of %d bytes\n",
+	     n, buf_size);
+      exit(1);
+    } 
+    tmp_buf += buf_size;
+    bytes_to_go -= buf_size;
+  }
+}
+
+void Simple_IO::write_data(Chunk* subchunk)
+{
+  int size;
+  size = subchunk->total_size_in_bytes();
+  write_data((char *)(subchunk->data_ptr()), size, subchunk->element_size());
+}     
+
+void Simple_IO::free_datatypes()
+{
+ for(int i=0; i <num_overlaps_; i++) MPI_Type_free(&datatypes_[i]);
+}
+
+void Simple_IO::send_data_to_compute_nodes(Chunk *subchunk, 
+					   int &array_bytes_to_go)
+{
+  for(int i=0; i< num_overlaps_; i++)
+    send_data(subchunk, i, array_bytes_to_go);
+}
+
+void Simple_IO::receive_data_from_compute_nodes(Chunk *subchunk, 
+						int &array_bytes_to_go)
+{
+  for (int i=0; i< num_overlaps_; i++) 
+    receive_data(subchunk, i, array_bytes_to_go);
+}
+
+void Simple_IO::wait_for_completion(int &array_bytes_to_go,
+				    Array *compute_array)
+{
+  int flag=0;
+
+  if (part_time_io_){
+    /* This is to avoid deadlocks */
+    while (!flag){
+      MPI_Testall(num_overlaps_, requests_, &flag, statuses_);
+      if (array_bytes_to_go > 0) 
+	process_compute_message(array_bytes_to_go, compute_array);
+    }
+  } else {
+    MPI_Waitall(num_overlaps_, requests_, statuses_);
+  }
+  /* Free the schema request objects - Do we need this*/
+  MPI_Waitall(num_overlaps_, schema_requests_, statuses_);
+}
+
+/* For part-io nodes, get the data using memory copy if the *
+ * data resides on the same node.                           */
+void Simple_IO::copy_data(Chunk *subchunk, int index, Boolean flag,
+			  int &array_bytes_to_go)
+{
+  void *comp_data_ptr;
+  MPI_Datatype comp_datatype;
+  int position=0, buf_size;
+  void *buf=NULL;
+  int *schema = schema_bufs_[index];
+  int comp_chunk_id =  schema[0];
+  int comp_array_rank = schema[3];
+  int *base = &schema[5];
+  int *size = &schema[5+comp_array_rank*1];
+  int *stride = &schema[5+comp_array_rank*2];
+  int bytes_copied = num_elements(comp_array_rank, size)*
+                     subchunk->element_size();
+  Array *comp_array = compute_node_array_;
+  Chunk *comp_chunk = comp_array->find_chunk(comp_chunk_id);
+  comp_chunk->make_datatype(base, size,stride, &comp_data_ptr, 
+			    &comp_datatype);
+  if (array_bytes_to_go > 0) array_bytes_to_go -= bytes_copied;
+
+  if (flag){
+    MPI_Pack_size(1, datatypes_[index], MPI_COMM_WORLD, &buf_size);
+    buf = (void *) malloc(buf_size);
+    MPI_Pack(data_ptrs_[index], 1, datatypes_[index], buf, buf_size, 
+	     &position, MPI_COMM_WORLD);
+    position =0;
+    MPI_Unpack(buf, buf_size, &position, comp_data_ptr, 1, comp_datatype, 
+	       MPI_COMM_WORLD);
+    free(buf);
+  } else {
+    MPI_Pack_size(1, comp_datatype, MPI_COMM_WORLD, &buf_size);
+    buf = (void *) malloc(buf_size);
+    MPI_Pack(comp_data_ptr, 1, comp_datatype, buf, buf_size, 
+	     &position, MPI_COMM_WORLD);
+    position = 0;
+    MPI_Unpack(buf, buf_size, &position, data_ptrs_[index], 1,
+	       datatypes_[index], MPI_COMM_WORLD);
+    free(buf);
+  }
+  MPI_Type_free(&comp_datatype);
+}
+
+/* For nat chunking with no user defined subchunking, read/write
+ * data directly from compute chunk (i.e if it is on same node) */    
+void Simple_IO::direct_io(int chunk_id, Boolean flag, int &array_bytes_to_go)
+{
+  Array *comp_array = compute_node_array_;
+  Chunk *comp_chunk = comp_array->find_chunk(chunk_id);
+  if (flag) read_data(comp_chunk);
+  else write_data(comp_chunk);
+  if (array_bytes_to_go > 0) 
+    array_bytes_to_go -= comp_chunk->total_size_in_bytes();
+}
+
+void Simple_IO::realloc_schema_bufs(int new_size)
+{
+  int schema_buf_size = sizeof(int)*(6+new_size*3);
+  
+  max_rank_ = new_size;
+  overlap_base_ = (int *) realloc(overlap_base_, max_rank_*sizeof(int));
+  overlap_size_ = (int *) realloc(overlap_size_, max_rank_*sizeof(int));
+  overlap_stride_ = (int *) realloc(overlap_stride_, max_rank_*sizeof(int));
+  for(int i=0; i < max_overlaps_; i++){
+    schema_bufs_[i] = (int *) realloc(schema_bufs_[i], schema_buf_size);
+  }
+}
+  
+void Simple_IO::realloc_mem_bufs(int new_size)
+{
+  mem_buf_size_ = new_size;
+  mem_buf_ = (char *) realloc(mem_buf_, sizeof(char)*mem_buf_size_);
+  MPIFS_global_obj->set_mem_buf_size(new_size);
+  MPIFS_global_obj->set_mem_buf(mem_buf_);
+}
+
+void Simple_IO::start_to_finish(Boolean part_time, Array *compute_array)
+{
+  int make_subchunks, bytes_to_go;
+  int array_bytes_to_go,*ptr;
+  Boolean read_op;
+  Chunk *chunk=NULL, *subchunk=NULL, *compute_chunk=NULL, *tmp_chunk;
+
+  op_type_ 	 = current_array_->op_type();
+  if ((op_type_ == RESTART)||(op_type_ == GENERAL_READ)||
+      (op_type_ == READ_TIMESTEP))
+    read_op = YES;
+  else
+    read_op = NO;
+
+  part_time_io_ = part_time;
+  compute_node_array_ = compute_array;
+
+  if (read_op) {
+    int numbertype, rank, index, datatype, length;
+    int *dims = (int *)malloc(sizeof(int) * 10);
+    IOreadInfo(file_ptr_, &numbertype, &rank, dims, 10);
+    int *size = (int *)malloc(sizeof(int) * rank);
+
+    index = IOreadAttributeInfo(file_ptr_, "global_size", &datatype, &length);
+    if (index >=0 ) { // the attribute exists
+      IOreadAttribute(file_ptr_, index, size);
+      current_array_->init(rank, numbertype, size, IO_NODE); 
+    } else { printf("Error: no attribute, global_size\n"); exit(0); }
+    free(dims);
+
+printf("%d: read rank %d, numbertype %d, size (%d %d %d)\n", world_rank_,
+	rank, numbertype, size[0], size[1], size[2]);
+
+    int schema_size = 2 + rank;
+    int *schema = (int *)malloc(sizeof(int) * schema_size);
+    if (MPIFS_global_obj->am_master_io_node()) {
+      schema[0] = rank; schema[1] = numbertype; 
+      for (int i=0; i<rank; i++) schema[2+i] = size[i];
+      send_message((void *)schema, schema_size, MPI_INT, 
+		   app_info_->get_master(),
+		   ARRAYGROUP_SCHEMA, MPI_COMM_WORLD);
+    }
+    if (part_time_io_) {
+      MPI_Status status;
+      receive_message(schema, schema_size, MPI_INT, MPI_ANY_SOURCE, 
+		      ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+      MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *)schema,
+                                  schema_size, MPI_INT, ARRAYGROUP_SCHEMA);
+
+      compute_array->init(rank, numbertype, size, COMPUTE_NODE);
+    }
+    free(schema);
+  }
+ 
+  if (part_time_io_) array_bytes_to_go = compute_node_array_->array_info();
+ 
+  /* To reduce costs associated with object creation and deletion, we *
+   * will create a dummy chunk,subchunk and compute chunk object and  *
+   * re-initialize them whenever necessary.                           */
+  tmp_chunk = chunk = new Chunk();
+  current_chunk_ = chunk;
+  subchunk = new Chunk();
+  compute_chunk = new Chunk();
+
+  make_subchunks = -1;
+
+  nat_chunked_   = current_array_->nat_chunked();
+  sub_chunked_   = current_array_->sub_chunked();
+  overlaped_     = current_array_->overlaped();
+  if (overlaped_) { contiguous_ = NO; nat_chunked_ = NO; }
+  else {  
+    if (nat_chunked_ && !sub_chunked_)
+      contiguous_ = YES; /* No need to use derived datatypes */
+    else contiguous_ = NO;  /* Have to use derived datatypes */
+  }
+    
+  array_rank_ = current_array_->rank();
+  if (array_rank_ > max_rank_) realloc_schema_bufs(array_rank_);
+
+  if (read_op) current_array_->read_schema_file(file_ptr_);
+
+  num_of_chunks_ = current_array_->layout(IO_NODE)->total_elements();
+  current_chunk_id_ = current_array_->get_next_index(chunk, -1, my_io_rank_,
+						     num_io_nodes_,
+						     num_of_chunks_);
+
+#ifdef DEBUG
+    printf("%d: current_chunk_id_=%d my_io_rank=%d num_io_nodes=%d\n",
+	   world_rank_, current_chunk_id_, my_io_rank_, num_io_nodes_);
+#endif
+  if (contiguous_){
+    /* Natural chunked and no user-specified subchunking. Therefore we don't 
+     *  need to used mpi-derived datatypes.  */
+      
+    while (current_chunk_id_ < num_of_chunks_) {
+      if (!read_op) { 
+        int *tmp_size = (int *)malloc(sizeof(int) * array_rank_);
+        for (int cnt = 0; cnt < array_rank_; cnt++)
+          tmp_size[cnt] = chunk->size()[array_rank_ - cnt - 1];
+        IOreserveChunk(file_ptr_, current_array_->ieee_size(), 
+		       array_rank_, tmp_size);
+        //printf("##### called IOreserveChunk for n.c. %d %d %d %d %d\n", current_array_->ieee_size(), array_rank_, tmp_size[0], tmp_size[1], tmp_size[2]);
+ 
+        free(tmp_size);
+        if (num_of_chunks_ > 1) {
+	  IOwriteAttribute(file_ptr_,"chunk_origin", INT32, 3, chunk->base());
+	  IOwriteAttribute(file_ptr_, "chunk_size", INT32, 3, chunk->size());
+	}
+      } 
+
+      /* for part-time io case, if chunk resides on same node, perform the *
+       * read/write operation directly.                                    */
+      num_overlaps_ = 1;
+      overlap_chunk_ids_[0] = current_chunk_id_;
+      dest_ids_[0] = app_info_->world_rank(current_array_->which_node(
+					   current_chunk_id_, COMPUTE_NODE));
+
+      if (part_time_io_ && (world_rank_ == dest_ids_[0])){
+	direct_io(current_chunk_id_, read_op, array_bytes_to_go);
+      } else {
+	bytes_to_go = chunk->total_size_in_bytes();
+	chunk->set_data_ptr(mem_buf_);
+
+	/* Make the schema request */
+	ptr = schema_bufs_[0];
+	*ptr++ = current_chunk_id_;
+	*ptr++ = (int)nat_chunked_;
+	*ptr++ = (int)contiguous_;
+	*ptr++ = op_type_;
+	*ptr++ = 0;  /* This is the offset */
+	*ptr++ = 0; /* Size of the data */
+
+	ptr = schema_bufs_[0];
+	while(bytes_to_go > 0){
+	  ptr[5] = min(SUBCHUNK_SIZE, bytes_to_go);
+
+	  nb_send_message((void *)ptr, 6, MPI_INT, dest_ids_[0],
+			  CHUNK_SCHEMA, MPI_COMM_WORLD, &schema_requests_[0]);
+	  if (read_op){
+	    read_data(mem_buf_, ptr[5], chunk->element_size());
+	    nb_send_message((void *)mem_buf_, ptr[5], MPI_CHAR, dest_ids_[0],
+			    CHUNK_DATA_FROM_IO, MPI_COMM_WORLD, &requests_[0]);
+	  } else
+	    nb_receive_message((void *)mem_buf_, ptr[5], MPI_CHAR, 
+				dest_ids_[0], CHUNK_DATA_TO_IO, 
+				MPI_COMM_WORLD, &requests_[0]);
+	  /* Have to watch for deadlock over here */
+	  wait_for_completion(array_bytes_to_go, compute_node_array_);
+	  if (!read_op) write_data(mem_buf_, ptr[5], chunk->element_size());
+	  ptr[4] += ptr[5];
+	  bytes_to_go -= ptr[5];
+	}
+	chunk->set_data_ptr(NULL);
+      }
+      current_chunk_id_ = current_array_->get_next_index(chunk, 
+							 current_chunk_id_,
+							 my_io_rank_,
+							 num_io_nodes_,
+							 num_of_chunks_);
+    } /* End while */
+  } /* End if (contiguous_) */
+  else {
+    /* We have no choice but to use MPI-derived datatypes */
+    while(current_chunk_id_ < num_of_chunks_){
+      if (!read_op) { 
+        int *tmp_size = (int *)malloc(sizeof(int) * array_rank_);
+        for (int cnt = 0; cnt < array_rank_; cnt++)
+          tmp_size[cnt] = chunk->size()[array_rank_ - cnt - 1];
+        IOreserveChunk(file_ptr_, current_array_->ieee_size(), 
+		array_rank_, tmp_size);
+        //printf("##### called IOreserveChunk for r.o. %d %d %d %d %d\n", current_array_->ieee_size(), array_rank_, tmp_size[0], tmp_size[1], tmp_size[2]);
+
+        free(tmp_size);
+        if (num_of_chunks_ > 1) {
+          IOwriteAttribute(file_ptr_,"chunk_origin", INT32, 3, chunk->base());
+	  IOwriteAttribute(file_ptr_, "chunk_size", INT32, 3, chunk->size());
+        }
+      } 
+
+      /* If the array is not subchunked, then subchunk the array into *
+       * SUBCHUNK_SIZE chunks. This is to reduce the size of the      *
+       * messages and the memory requirements. The current version makes a   *
+       * dumb assumption, that if the user specifies the subchunks,   *
+       * then the size of those subchunks is less than SUBCHUNK_SIZE. *
+       * It's a dumb assumption and needs to be fixed.                */
+
+      if (!sub_chunked_ && (make_subchunks == -1)){
+        current_array_->make_sub_chunks(chunk);
+	make_subchunks = 1;
+      }
+      num_of_subchunks_ =current_array_->layout(SUB_CHUNK)->total_elements();
+
+      for (current_subchunk_id_=0; current_subchunk_id_ < num_of_subchunks_;
+	   current_subchunk_id_++){
+	subchunk->init(chunk, current_subchunk_id_, NO_ALLOC);
+	bytes_to_go = subchunk->total_size_in_bytes();
+
+	if (bytes_to_go > mem_buf_size_) realloc_mem_bufs(bytes_to_go);
+	subchunk->set_data_ptr(mem_buf_);
+
+	compute_chunk_overlaps(current_array_, subchunk);
+	compute_schemas(current_array_, subchunk, compute_chunk);
+
+        if (read_op){
+	  read_data(subchunk);
+	  send_data_to_compute_nodes(subchunk, array_bytes_to_go);
+	} else receive_data_from_compute_nodes(subchunk, array_bytes_to_go);
+	wait_for_completion(array_bytes_to_go, compute_node_array_);
+	if (!read_op) write_data(subchunk);
+
+	free_datatypes();
+	subchunk->set_data_ptr(NULL);
+      }
+      current_chunk_id_ = current_array_->get_next_index(chunk, 
+							 current_chunk_id_,
+							 my_io_rank_,
+							 num_io_nodes_,
+							 num_of_chunks_);
+    } /* End while loop */
+  } /* End if else */
+
+#ifdef DEBUG
+  printf("%d:Finished the I/O\n", world_rank_);
+#endif
+  if (part_time_io_){
+    /* Since the I/O side is finished jump into the compute loop */
+    while (array_bytes_to_go > 0)
+      process_compute_message(array_bytes_to_go, compute_node_array_);
+#ifdef DEBUG
+    printf("%d:Finished the compute side of the part-time io\n", world_rank_);
+#endif
+  }
+
+  /* Delete chunk, subchunk, compute_chunk */
+  if (tmp_chunk) delete tmp_chunk;
+  if (subchunk) delete subchunk;
+  if (compute_chunk) delete compute_chunk;
+  chunk=subchunk=compute_chunk=NULL;
+}
+
+/* This function should not access any of the instance variables of
+ * the Simple_IO object without setting them first
+ */
+void Simple_IO::compute_node_io_loop(Array *array)
+{
+  int op_type = array->op_type();
+  if ((op_type == RESTART) || (op_type == GENERAL_READ) ||
+      (op_type == READ_TIMESTEP)) {
+    MPI_Status status;
+    int *schema, schema_size;
+
+    MPI_Probe(MPI_ANY_SOURCE, ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+    mpi_get_count(&status, MPI_INT, &schema_size);
+    schema = (int *)malloc(sizeof(int) * schema_size);
+    receive_message((void *)schema, schema_size, MPI_INT, status.MPI_SOURCE,
+		    ARRAYGROUP_SCHEMA, MPI_COMM_WORLD, &status);
+    MPIFS_global_obj->Broadcast(COMPUTE_NODE, (void *)schema,
+                                schema_size, MPI_INT, ARRAYGROUP_SCHEMA);
+
+    int *size = (int *)malloc(sizeof(int) * schema[0]);
+    for (int i=0; i<schema[0]; i++) size[i] = schema[2+i];
+printf("%d: read rank %d, numbertype %d, size (%d %d %d)\n", world_rank_,
+	schema[0], schema[1], size[0], size[1], size[2]);
+    array->init(schema[0], schema[1], size, COMPUTE_NODE);
+    free(schema);
+  }
+
+  int array_bytes_to_go = array->array_info();
+  while (array_bytes_to_go > 0) 
+    process_compute_message(array_bytes_to_go, array);
+}
+
+void Simple_IO::process_compute_message(int &arrays_bytes_to_go, 
+				        Array *array)
+{
+  int msg_code, msg_tag, msg_src;
+  MPI_Status status;
+  int data_size;
+
+  any_new_message(&msg_code, &msg_src, &msg_tag, &status);
+  switch(msg_code){
+  case CHUNK_SCHEMA:
+    /* Do something about it */
+    process_chunk_schema_request(msg_src,msg_tag, arrays_bytes_to_go, 
+				 &status, array);
+    break;
+    
+  case CHUNK_DATA_FROM_IO:
+    MPI_Get_count(&status, MPI_CHAR, &data_size);
+    printf("Received chunk_data before chunk schema from %d of size %d\n",
+	   msg_src, data_size);
+    MPI_Probe(msg_src, (msg_tag/10)*10+CHUNK_SCHEMA, MPI_COMM_WORLD, &status);
+    printf("Received the corressponding chunk schema message\n");
+    process_chunk_schema_request(msg_src, (msg_tag/10)*10+CHUNK_SCHEMA,
+				 arrays_bytes_to_go,
+				 &status, array);
+    break;
+
+  case NO_MESSAGE:
+    /* Do nothing */
+    break;
+  default:
+    /* This message is not for me */
+    printf("In process compute message - unknown code %d\n", msg_code);
+    break;
+  }
+}
+
+void Simple_IO::process_chunk_schema_request(int msg_src, int msg_tag, 
+					     int &array_bytes_to_go,
+					     MPI_Status *status, Array *array)
+{
+  int *schema_buf, schema_size;
+  int chunk_id, op_type, array_rank, *base, *size, *stride, *ptr;
+  int data_size, elt_size, offset;
+  Boolean contiguous;
+  MPI_Datatype datatype;
+  Chunk *chunk;
+  void *data_ptr;
+
+  MPI_Get_count(status, MPI_INT, &schema_size);
+  schema_buf = (int *) malloc(sizeof(int)*schema_size);
+  receive_message((void *)schema_buf, schema_size, MPI_INT, msg_src, 
+		  msg_tag, MPI_COMM_WORLD, status);
+
+  ptr = schema_buf;
+  chunk_id = *ptr++;
+  ptr++;
+  contiguous = (Boolean) *ptr++;
+  chunk = array->find_chunk(chunk_id);
+
+  if (contiguous){
+    op_type = *ptr++;
+    offset =  *ptr++;
+    data_size = *ptr++;
+    data_ptr = chunk->data_ptr();
+    data_ptr = (char *)((char *) data_ptr + offset);
+      
+    if ((op_type == RESTART) || (op_type == READ_TIMESTEP) ||
+	(op_type == GENERAL_READ))
+      receive_message((void *) data_ptr, 
+		      data_size,
+		      MPI_CHAR, msg_src, 
+		      (msg_tag/10*10)+CHUNK_DATA_FROM_IO,
+		      MPI_COMM_WORLD, status);
+      else
+	send_message((void *)data_ptr, data_size,MPI_CHAR, msg_src,
+		     (msg_tag/10)*10+CHUNK_DATA_TO_IO,
+		     MPI_COMM_WORLD);
+    
+  }
+  else{
+    array_rank = *ptr++;
+    op_type = *ptr++;
+    base = &ptr[0];
+    size = &ptr[array_rank*1];
+    stride = &ptr[array_rank*2];
+    elt_size = chunk->element_size();
+    data_size =  num_elements(array_rank, size)*elt_size;
+      
+    chunk->make_datatype(base,size,stride, &data_ptr, &datatype);
+    if ((op_type == RESTART) || (op_type == READ_TIMESTEP) ||
+	(op_type == GENERAL_READ))
+      receive_message(data_ptr, 1, datatype,msg_src, 
+		      (msg_tag/10)*10+CHUNK_DATA_FROM_IO,
+		      MPI_COMM_WORLD, status);
+    else
+      send_message(data_ptr, 1, datatype, msg_src,
+		   (msg_tag/10)*10+CHUNK_DATA_TO_IO,
+		   MPI_COMM_WORLD); 
+      MPI_Type_free(&datatype);
+  }
+  
+  array_bytes_to_go -= data_size;
+  free(schema_buf);
+}
diff --git a/src/Panda/Simple_IO.h b/src/Panda/Simple_IO.h
new file mode 100644
index 0000000..4df4831
--- /dev/null
+++ b/src/Panda/Simple_IO.h
@@ -0,0 +1,91 @@
+#ifndef Simple_IO_dot_h
+#define Simple_IO_dot_h
+
+#include "Collective_IO.h"
+
+
+class ArrayGroup;
+class Array;
+class Chunk;
+class App_Info;
+
+//#include "../IEEEIO/IEEEIO.h"
+//#include "../IEEEIO/IOProtos.h"
+
+class Simple_IO : public Collective_IO
+{
+ protected:
+  Boolean       dummy_;  /* Do the instance variables mean anything */
+  int           *schema_string_;
+  int           schema_size_;
+  int           *current_schema_ptr_;
+  Array         *current_array_;
+  Chunk         *current_chunk_;
+  int           num_of_subchunks_;
+  int           current_subchunk_id_;
+  int           num_of_chunks_;
+  int           current_chunk_id_;
+  IOFile	file_ptr_;
+  FILE          *schema_file_ptr_;
+  int           num_io_nodes_;
+  int           my_io_rank_;
+  int           compute_app_num_;
+  App_Info      *app_info_;
+  Boolean       part_time_io_;
+  Array         *compute_node_array_;
+  int		op_type_;
+  Boolean       nat_chunked_;
+  Boolean       sub_chunked_;
+  Boolean	overlaped_;
+  Boolean       contiguous_;
+  int           world_rank_;
+
+  int           num_overlaps_;
+  int           max_overlaps_;
+  int           *overlap_chunk_ids_;
+  int           *dest_ids_;
+  int           **schema_bufs_;
+  MPI_Request   *schema_requests_;
+  MPI_Request   *requests_;
+  MPI_Status    *statuses_;
+  MPI_Datatype  *datatypes_;
+  int           max_rank_;
+  int           array_rank_;
+  int           *overlap_base_;
+  int           *overlap_stride_;
+  int           *overlap_size_;
+  char          **data_ptrs_;
+  char          *mem_buf_;
+  int            mem_buf_size_;  
+
+
+  void          realloc_buffers(int);
+  void          compute_chunk_overlaps(Array*,Chunk*);
+  void          compute_schemas(Array*,Chunk*,Chunk*);
+  virtual void  send_schema_message(int);
+  void          make_datatype(Chunk*,int);
+  void          receive_data(Chunk*,int, int&);
+  void          send_data(Chunk*, int, int&);
+  void          read_data(Chunk*);
+  void          read_data(char*,int,int);
+  void          write_data(char*,int,int);
+  void          write_data(Chunk*);
+  void          copy_data(Chunk*,int,Boolean,int&);
+  void          direct_io(int,Boolean,int&);
+  void          free_datatypes();
+  void          wait_for_completion(int&,Array*);
+  void          send_data_to_compute_nodes(Chunk*, int&);
+  void          receive_data_from_compute_nodes(Chunk*, int&);
+  void          realloc_schema_bufs(int);
+  void          realloc_mem_bufs(int);
+  void          process_compute_message(int&,Array*);
+  void          process_chunk_schema_request(int,int,int&,MPI_Status*,Array*);
+ public:
+                Simple_IO();
+                Simple_IO(int*,int,int,int, int , App_Info*, IOFile);
+  virtual      ~Simple_IO();
+  virtual void  start_to_finish(Boolean part_time_io,Array*);
+  virtual void  compute_node_io_loop(Array*);
+};
+
+#endif
diff --git a/src/Panda/StopWatch.h b/src/Panda/StopWatch.h
new file mode 100644
index 0000000..e38c5d7
--- /dev/null
+++ b/src/Panda/StopWatch.h
@@ -0,0 +1,34 @@
+#ifndef StopWatch_dot_h
+#define StopWatch_dot_h
+
+#include <stdio.h>
+#include <mpi.h>
+
+class StopWatch 
+{
+ private:
+  double start_t,finish_t;
+  char description[200];
+
+ public:
+  StopWatch () { start_t = finish_t = -1; }
+  ~StopWatch() {              };
+  void start() { start_t = MPI_Wtime(); }
+  void stop (char *desc)
+               {
+		 finish_t = MPI_Wtime();
+                 if (start_t == -1.0)
+                   fprintf(stderr, "StopWatch: must start before stop\n");
+                 else
+                   sprintf(description, "%s elapsed time: %f, (%f, %f)\n"
+                                  ,desc
+                                  ,finish_t-start_t
+				  ,start_t, finish_t);
+                start_t = finish_t = -1; 
+               }
+  char *get_description() { return description;}
+
+};
+
+
+#endif
diff --git a/src/Panda/Template.C b/src/Panda/Template.C
new file mode 100644
index 0000000..5600e2f
--- /dev/null
+++ b/src/Panda/Template.C
@@ -0,0 +1,40 @@
+#include "definitions.h"
+#include "Template.h"
+
+Template::Template(int Rank, int *sizearray)
+{
+  rank_ = Rank;
+  if (sizearray) size_ = copy_int_list(Rank, sizearray);
+}
+
+Template::Template()
+{
+  rank_ = 0;
+  size_ = NULL; 
+}
+
+Template::~Template()
+{
+   if (size_ != NULL) free(size_);
+   size_ = NULL;
+}
+
+int Template::rank()
+{
+  return rank_;
+}
+     
+int* Template::size()
+{
+  return size_;
+}
+
+int Template::total_elements()
+{
+  return  num_elements(rank_, size_);
+}
+
+int Template::size(int i)
+{
+  return size_[i];
+}
diff --git a/src/Panda/Template.h b/src/Panda/Template.h
new file mode 100644
index 0000000..ff9483a
--- /dev/null
+++ b/src/Panda/Template.h
@@ -0,0 +1,22 @@
+#ifndef template_dot_h
+#define template_dot_h
+
+#include<malloc.h>
+
+class Template {
+ protected:
+  int             rank_;
+  int             *size_;
+
+ public:
+                  Template();
+                  Template(int Rank, int *sizearray);
+  virtual        ~Template();
+  int             rank();  
+  int*            size();
+  int             size(int);
+  int             total_elements();
+};
+
+#endif
+
diff --git a/src/Panda/VirtFS.C b/src/Panda/VirtFS.C
new file mode 100644
index 0000000..f220392
--- /dev/null
+++ b/src/Panda/VirtFS.C
@@ -0,0 +1,4 @@
+#include "VirtFS.h"
+
+// jozwiak 030295
+// there should be nothing here of substance...since VirtFS is purely  virtual
diff --git a/src/Panda/VirtFS.h b/src/Panda/VirtFS.h
new file mode 100644
index 0000000..8b13a14
--- /dev/null
+++ b/src/Panda/VirtFS.h
@@ -0,0 +1,14 @@
+#ifndef VirtFS_dot_h
+#define VirtFS_dot_h
+
+#include <stdio.h>
+
+
+class VirtFS
+{
+ public:
+                  VirtFS(){}                                                 ;
+  virtual        ~VirtFS(){}                                                 ;
+};
+
+#endif
diff --git a/src/Panda/c_interface.C b/src/Panda/c_interface.C
new file mode 100644
index 0000000..b6085d6
--- /dev/null
+++ b/src/Panda/c_interface.C
@@ -0,0 +1,172 @@
+/* This is a simple interface for c applications to use Panda  */
+#include <stdio.h>
+#include <stdlib.h>
+#include "definitions.h"
+#include "Panda.h"
+#include "MPIFS.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+
+
+extern "C" { int Panda_Create(int, int); }
+extern "C" { void Panda_Finalize(); }
+extern "C" { void PandaTimestep(struct ArrayInfo *); }
+extern "C" { void *PandaReadTimestep(struct ArrayInfo *); }
+extern "C" { void Panda_WriteAttribute(char *, char *, int, int, void *); }
+extern "C" { void *Panda_ReadAttribute(char *, char *, int *, int *); }
+extern "C" { Boolean PandaIsNewFile(char *); }
+
+typedef struct ArrayInfo {
+  char*         name_;          /* array name */
+  int           rank_;          /* rank */
+  int*          size_;          /* glbal size of the array */
+  int           esize_;        /* size of each element */
+  int           mem_rank_;      /* compute processor topology - rank */
+  int*          mem_layout_;    /* compute processor topology - mesh */
+  Distribution* mem_dist_;      /* compute processor topology - dist */
+  int           disk_rank_;     /* io processor topology - rank */
+  int*          disk_layout_;   /* io processor topology - mesh */
+  Distribution* disk_dist_;     /* io processor topology - dist */
+  char*         data_;          /* data pointer belonging to me */
+  int           stencil_width_; /* stencil width */
+  struct ArrayInfo *next_;      /* next element */
+} ArrayInfo;
+
+Panda *global_bear = NULL;
+extern MPIFS *MPIFS_global_obj;
+
+int Panda_Create(int ioproc_every, int is_part_time_mode)
+{
+  int i, my_app_size, my_rank, *world_ranks;
+  int io_nodes;
+
+/*  if (io_nodes > 1) {
+    printf("Warning: Write Chunks instead of Write arrays.\n");
+    printf("There might be errors in Attributes write\n");
+  }*/
+
+  MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+  world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+  for (i=0; i<my_app_size; i++) world_ranks[i] = i;
+  io_nodes = (my_app_size - 1) / ioproc_every + 1;
+
+  if (is_part_time_mode) {
+    if (my_rank < io_nodes) { /* part-time io nodes */
+      global_bear = new Panda(PART_TIME_IO, my_rank, my_app_size, world_ranks,
+                              my_rank, io_nodes, world_ranks);
+      //printf("##### Panda proc %d/%d PART_TIME_IO\n", my_rank, my_app_size);
+    } else { /* part-time compute nodes */
+      global_bear = new Panda(PART_TIME_COMPUTE, my_rank, my_app_size, 
+		              world_ranks, -1, io_nodes, world_ranks);
+      //printf("##### Panda proc %d/%d PART_TIME_COMPUTE\n", my_rank, my_app_size);
+    }
+  } else  {
+//  printf("Warning: Full-time I/O nodes is not integrated with Cactus yet, ");
+//  printf("due to the communicator problem. Panda's part is done, though\n");
+    if (my_rank < io_nodes) { /* full-time io nodes */
+      global_bear = new Panda(IO_NODE, 0, my_rank, io_nodes, world_ranks);
+      delete global_bear;
+      free(world_ranks);
+      return 1;
+    } else { /* compute nodes */
+      for (i=0; i<(my_app_size-io_nodes); i++) world_ranks[i] += io_nodes;
+      global_bear = new Panda(COMPUTE_NODE, 1, my_rank-io_nodes,
+                      	      my_app_size-io_nodes, world_ranks);
+    }
+  }
+  free(world_ranks);
+  return 0;
+}
+
+void Panda_Finalize()
+{
+  if (global_bear) delete global_bear;
+}
+
+
+void PandaTimestep(ArrayInfo *ptr)
+{
+/* Test if Panda_Create() has been called */
+  if (global_bear == NULL) {
+    printf("Panda object is not created yet - Use Panda_Create(...)\n");
+    return;
+  } 
+
+/* Create array information */
+  ArrayLayout *mem_layout, *disk_layout;
+  Array *array;
+  
+  mem_layout = new ArrayLayout(ptr->mem_rank_, ptr->mem_layout_);
+  disk_layout = new ArrayLayout(ptr->disk_rank_, ptr->disk_layout_);
+    
+  array = new Array(ptr->name_, ptr->rank_, ptr->size_, ptr->esize_,
+                    mem_layout, ptr->mem_dist_, 
+		    disk_layout, ptr->disk_dist_, 
+		    ptr->data_, ptr->stencil_width_);
+
+  global_bear->app_barrier();
+  printf("---------------- Panda Timestep -------------------\n");
+  //printf("name %s rank %d size %d %d %d esize %d mem_layout %d %d %d disk_layout %d stencil_width_ %d\n", ptr->name_, ptr->rank_, ptr->size_[0], ptr->size_[1], ptr->size_[2], ptr->esize_, ptr->mem_layout_[0], ptr->mem_layout_[1], ptr->mem_layout_[2], ptr->disk_layout_[0], ptr->stencil_width_);
+
+  array->timestep();
+
+  delete mem_layout;
+  delete disk_layout;
+  delete array;
+}
+
+void *PandaReadTimestep(ArrayInfo *ptr)
+{
+/* Test if Panda_Create() has been called */
+  if (global_bear == NULL) {
+    printf("Panda object is not created yet - Use Panda_Create(...)\n");
+    return NULL;
+  } 
+
+/* Create array information */
+  ArrayLayout *mem_layout, *disk_layout;
+  Array *array;
+  
+  mem_layout = new ArrayLayout(ptr->mem_rank_, ptr->mem_layout_);
+  disk_layout = NULL;
+  array = new Array(ptr->name_, ptr->rank_, ptr->size_, ptr->esize_, 
+		    mem_layout, ptr->mem_dist_, 
+		    disk_layout, ptr->disk_dist_, 
+		    ptr->data_, ptr->stencil_width_);
+
+  printf("---------------- Panda ReadTimestep -------------------\n");
+  global_bear->app_barrier();
+  array->read_timestep();
+  void *data = (void *)array->get_data_ptr();
+  array->set_data_ptr(NULL);
+
+  delete mem_layout;
+  delete array;
+  return data;
+}
+
+void Panda_WriteAttribute(char *fname, char *name, int esize, 
+			  int count, void *data)
+{
+  Attribute *attr = new Attribute();
+  attr->write(fname, name, esize, count, data);
+  delete attr;
+}
+
+void *Panda_ReadAttribute(char *fname, char *name, int *type, int *count)
+{
+  Attribute *attr = new Attribute();
+  attr->read(fname, name);
+  void *data = attr->get_data_ptr();
+  attr->set_data_ptr(NULL);
+  *type = attr->esize();
+  *count = attr->count();
+  delete attr;  
+  return data; 
+}
+
+Boolean PandaIsNewFile(char *fname) 
+{
+  return MPIFS_global_obj->is_new_file(fname);
+}
diff --git a/src/Panda/c_interface.h b/src/Panda/c_interface.h
new file mode 100644
index 0000000..b167f6f
--- /dev/null
+++ b/src/Panda/c_interface.h
@@ -0,0 +1,28 @@
+#ifndef _included_C_Interface_h
+#define _included_C_Interface_h
+
+#include "external/IEEEIO/src/IEEEIO.h" 
+
+
+typedef enum { NONE,
+               BLOCK,
+               GENERAL,
+               CYCLIC
+               } Distribution;
+typedef struct ArrayInfo {
+  char*		name_; 		/* array name */		
+  int           rank_;          /* rank */
+  int*          size_;          /* glbal size of the array */
+  int           esize_;        /* size of each element */
+  int           mem_rank_;      /* compute processor topology - rank */
+  int*          mem_layout_;    /* compute processor topology - mesh */
+  Distribution* mem_dist_;      /* compute processor topology - dist */
+  int           disk_rank_;     /* io processor topology - rank */
+  int*          disk_layout_;   /* io processor topology - mesh */
+  Distribution* disk_dist_;     /* io processor topology - dist */
+  char*         data_;          /* data pointer belonging to me */
+  int           stencil_width_; /* stencil width */
+  struct ArrayInfo *next_;      /* next element */
+} ArrayInfo;
+
+#endif
diff --git a/src/Panda/compute_test.C b/src/Panda/compute_test.C
new file mode 100644
index 0000000..fc61f34
--- /dev/null
+++ b/src/Panda/compute_test.C
@@ -0,0 +1,350 @@
+/*****************************************************************
+ *     This is a sample program that shows how the panda library *
+ *     is going to be used by the application programs.          *
+ *     The example command line format is in test7.script.       *
+ *     This example shows the interface with only disk layout    *
+ *     info but no stride or subchunking schema. The value for   *
+ *     those schemas use the default ones.                       *
+ *     The current test varies the size of arrays. However, the  *
+ *     wrapper function allows the number of the nodes to be     *
+ *     changed as well.                                          *
+ *     The first iteration loads all the code in memory.         *
+ *     The second run does the simulated disk simulation.        *
+ *     From the third run on, the values are the real writes.    *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag=0;
+  char      time_message[100];
+
+#ifdef VERIFYBF
+    for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+    t1->set_verify();
+#endif
+
+
+  global_bear->cleanfiles();
+  global_bear->createfiles();
+
+
+  for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+     if (i < Num_Simulate_Write){
+              t1->set_simulate_mode();
+              flag=0;
+	    }
+     else {
+              t1->reset_simulate_mode();
+              flag=1;
+	   }
+
+
+    global_bear->app_barrier();
+    t1->set_io_strategy(STRATEGY);
+    timer.start();
+    t1->timestep();
+    timer.stop(":");
+    sprintf(time_message,"%s Write: SIZE: %d, Time %i %s",
+		(flag==0? "Simulated":"Real"), 
+		arraysize, i, timer.get_description());
+    printf("%s", time_message);
+
+    if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1  ) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+
+    }  
+   }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag;
+  char time_message[100];
+#ifdef VERIFYBF
+      for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern(); 
+#endif
+
+  if (Num_Write + Num_Simulate_Write == 0) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+  }  
+
+
+
+  for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+    if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+    else {r1->reset_simulate_mode();
+          flag=1;
+	  global_bear->flushfiles();
+     }
+
+
+    global_bear->app_barrier();
+    r1->set_io_strategy(STRATEGY);
+    timer.start();
+    r1->read_timestep();
+    timer.stop(":");
+
+    sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ", 
+	(flag==0? "Simulated":"Real"),
+	arraysize, i, timer.get_description());
+    printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+    for(i=0;i<Num_of_Arrays;i++)
+	if (arrays[i]->verify_byte_pattern())
+		printf("Byte pattern verified for array %d\n", i);
+	else
+		printf("Byte pattern incorrect for array %d\n", i);
+#endif
+    global_bear->cleanfiles();
+}
+ 
+ 
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+           int mrank, int *mlayout, int drank, int *dlayout,
+           Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+  ArrayLayout *mem1;              // Memory array layout
+  ArrayLayout *disk1;             // Disk array layout
+  int i;
+  Array **arrays;
+  arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+  mem1 = new ArrayLayout (mrank,mlayout);
+  disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation. 
+  char *name;
+  name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+  char temp[5];
+  for (i=0; i< Num_of_Arrays; i++) {
+    strcpy(name,"z1Array");
+    sprintf(temp, "%d", i);
+    strcat(name, temp);
+    arrays[i] = new Array(name,arrayrank,arraysize,esize,
+		     mem1,mem_dist,disk1, disk_dist);
+  }
+  free(name);
+
+  if (Num_Simulate_Write + Num_Write > 0) {
+    ArrayGroup *t1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  t1->insert(arrays[i]);
+    test_timestep(t1, arraysize[arrayrank-1], arrays);
+    delete t1;
+    if (Num_Simulate_Read + Num_Read > 0) {
+      ArrayGroup *r1 = new ArrayGroup("z4timestep");
+      for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+      test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+      delete r1;
+     }  
+  } else {
+
+    ArrayGroup *r1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+    test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+    delete r1;
+  }
+
+  // delete all objects created
+
+  for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+  free(arrays);
+  delete disk1;
+  delete mem1;
+  return(0);
+}
+
+char my_getopt(char *str)
+{
+  char command[23][15];
+
+  strcpy(command[0], "-Total_nodes");
+  strcpy(command[1], "-Io_nodes");
+  strcpy(command[2], "-upper");
+  strcpy(command[3], "-Arraysize");
+  strcpy(command[4], "-Esize");
+  strcpy(command[5], "-Mlayout");
+  strcpy(command[6], "-Dlayout");
+  strcpy(command[7], "-mem_dist");
+  strcpy(command[8], "-disk_dist");
+  strcpy(command[9], "-num_arrays");
+  strcpy(command[10], "-read_simulate");
+  strcpy(command[11], "-Read");
+  strcpy(command[12], "-write_simulate");
+  strcpy(command[13], "-Write");
+  strcpy(command[14], "-interleave");
+  strcpy(command[15], "-Cost_model");
+  strcpy(command[16], "-size_message");
+  strcpy(command[17], "-Xfactor");
+  
+  for (int i= 0; i< 18; i++)  
+    if (!strncmp(str, command[i], 2)) return command[i][1];
+  printf("undefined input %s, quit!\n",str);
+  exit(0); 
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes, 
+	      int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+	      int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+	      Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode) 
+{
+  char opt;
+  int k;
+ 
+  for (int i=1; i<argc; ) {
+    opt = my_getopt(argv[i++]);
+    switch(opt) {
+      case 'X':
+        STRATEGY = atoi(argv[i++]);
+	break;
+      case 'T':
+        total_nodes =  atoi(argv[i++]); 
+	break;
+      case 'I': 
+	io_nodes = atoi(argv[i++]);
+	break;
+      case 'u': 
+	upper_bound = atoi(argv[i++]);  
+	break;
+      case 'A': 
+	arrayrank = atoi(argv[i++]);
+        arraysize = (int *) malloc(sizeof(int)* arrayrank);
+        mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+	lower_bound = arraysize[k-1];
+	break;	
+      case 'E': 
+	esize = atoi(argv[i++]); 
+	break;
+      case 'M': 
+	mrank = atoi(argv[i++]);
+	mlayout = (int *) malloc(sizeof(int)* mrank);
+        for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+	break;	  
+      case 'D':
+	drank = atoi(argv[i++]);
+	dlayout = (int *) malloc(sizeof(int)* drank);
+        for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+	break;	
+      case 'm':
+	for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'd':
+	for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'n':  
+	Num_of_Arrays = atoi(argv[i++]);
+        break;
+      case 'r': 
+	Num_Simulate_Read = atoi(argv[i++]);
+        break;
+      case 'R':
+ 	Num_Read = atoi(argv[i++]);
+	break;
+      case 'w':
+	Num_Simulate_Write = atoi(argv[i++]);
+	break;
+      case 'W':    	
+	Num_Write = atoi(argv[i++]);
+	break;
+      case 'i':
+	interleave = atoi(argv[i++]);
+	break;
+      case 'C':
+	cost_model_mode = atoi(argv[i++]);
+	break;
+      case 's':
+	SUBCHUNK_SIZE = atoi(argv[i++]);
+	break;
+      /* For Panda internal library stuff */
+
+      }
+   }
+} 
+   		
+int main(int argc, char **argv)
+{
+  int total_nodes;                // The number of total nodes (comp + io)
+  int io_nodes;                   // The number of io nodes
+  int upper_bound;                // The upper bound of the last dimension of the array
+  int lower_bound;                // The starting number of the last dimension of the array
+  int arrayrank ;                 // The array rank.
+  int *arraysize;                 // The number of elements along each array dimention
+  int esize ;                     // element size of each array element
+  int mrank ;                     // Compute node mesh rank
+  int *mlayout;                   // Compute node mesh layout
+  int drank ;                     // IO node mesh rank
+  int cost_model_mode;           // Whether the cost model is included.
+  int *dlayout;                   // IO node mesh layout
+  Distribution *mem_dist;         // The memory array distribution along each dimention
+                                  // There are three possible distributions (BLOCK,
+                                  // NONE, CYCLIC).
+  Distribution *disk_dist;        // The disk array distribution along each dimention
+  int my_rank, my_app_size, *world_ranks, leader;
+
+  
+  MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from 
+// this call, the io nodes will not return from the call. All the io nodes
+  
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+
+  parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, arrayrank, 
+          arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+
+  leader = io_nodes; 
+  world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+  for(int i=0;i< my_app_size; i++)
+        world_ranks[i] = leader+i;
+
+  Panda * bear = new Panda(COMPUTE_NODE, 1, my_rank, my_app_size, 
+			world_ranks);
+  global_bear = bear;
+
+  for (int size=lower_bound; size <= upper_bound; size*=2) {
+    arraysize[arrayrank-1] = size;
+    gemein(bear,io_nodes, arrayrank, arraysize, esize, 
+           mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+  }
+  free(arraysize);
+  free(mlayout);
+  free(dlayout);
+  free(mem_dist);
+  free(disk_dist);
+  free(world_ranks);
+  delete bear;
+  MPI_Finalize();
+  return(0);
+}
diff --git a/src/Panda/configure b/src/Panda/configure
new file mode 100644
index 0000000..f34cfdd
--- /dev/null
+++ b/src/Panda/configure
@@ -0,0 +1,75 @@
+#!/bin/sh
+# this is a script that is intended to guide the procession of
+# our makefiles in an independent way across multiple OS platforms
+# and multiple hardware platform...(first for sun)
+echo "checking out target machine:"
+X="os-detected"
+MY_OS=""
+
+if [ `uname -a | fgrep -i sun   | wc -l` -ne 0 ] ; then
+   MY_OS="sunos"
+fi
+
+if [ `uname -a | fgrep -i aix   | wc -l` -ne 0 ] ; then
+   MY_OS="aix"
+fi
+
+if [ `uname -a | fgrep -i hp-ux | wc -l` -ne 0 ] ; then
+   MY_OS="hp-ux"
+fi
+
+if [ `uname -a | fgrep -i irix  | wc -l` -ne 0 ] ; then
+   MY_OS="irix"
+fi
+
+/bin/rm -fr makefile
+
+case $MY_OS in
+       "sunos")
+                     echo "   detected SunOS..."
+                     echo $MY_OS > $X
+                     echo "include makefile.sun.mpich"   > makefile
+       ;;
+       "aix")
+                     echo "   detected AIX..."
+                     echo $MY_OS > $X
+                     echo "include makefile.ibm.mpif"    > makefile
+       ;;
+       "irix")
+                     echo "   detected IRIX..."
+                     echo $MY_OS > $X
+                     echo "include makefile.sgi.mpich"    > makefile
+       ;;
+       "hp-ux")
+                     echo "   detected HP-UX..."
+                     echo $MY_OS > $X
+                     echo "include makefile.hpux.mpich"  > makefile
+       ;;
+       *)
+                     echo "Hey, I don't know this operating system..."
+                     echo $MY_OS > $X
+                     echo "include makefile.unix.posix"  > makefile
+       ;;
+esac
+
+cat makefile.proto >> makefile
+
+case $MY_OS in
+	"irix")      #  those folks busted "which"
+           FOUND_MPI=`which -f mpirun |wc |awk '{print $2}'`
+	;;
+       *)
+           FOUND_MPI=`which    mpirun |wc |awk '{print $2}'`
+	;;
+esac
+
+if [ $FOUND_MPI -ne 0 ] ; then
+   echo "   found MPI..."
+fi
+FP='/scratch-modi4/'`whoami`'/'
+mkdir $FP >/dev/null  2>&1
+echo "   user temp directory $FP exists..."
+echo 'FILEPREFIXVAL=\"'$FP'\"' > fileprefix
+echo "the file \"makefile\" is now configured for target."
+
+exit 0
diff --git a/src/Panda/definitions.h b/src/Panda/definitions.h
new file mode 100644
index 0000000..32c4da0
--- /dev/null
+++ b/src/Panda/definitions.h
@@ -0,0 +1,186 @@
+#ifndef definitions_dot_h
+#define definitions_dot_h
+
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include "cctk.h"
+
+extern "C"  {int fsync(int f);}
+
+
+
+#define START  0
+#define WAITING 1
+
+/* Different I/O strategies */
+#define SIMPLE_IO 1
+#define CSDIO_IO 2
+
+/* The different possible nodetypes */
+#define COMPUTE_NODE 0
+#define IO_NODE 1
+#define PART_TIME_COMPUTE 2
+#define PART_TIME_IO 3
+#define SUB_CHUNK 4
+#define PART_TIME 5
+
+
+/* Unix or MPI based file system */
+#define MPI_SYSTEM 0
+#define UNIX_SYSTEM 1
+
+/* Different kinds of collective I/O operations */
+#define RESTART 0
+#define READ_TIMESTEP 1
+#define GENERAL_READ 2
+#define CHECKPOINT 3
+#define TIMESTEP 4
+#define GENERAL_WRITE 5
+
+
+/* Tags to indicate the type of the message */
+
+/* #define NO_MESSAGE 10
+   #define SPECIAL     9
+   #define ARRAYGROUP_SCHEMA 8
+   #define CHUNK_DATA_TO_IO  7
+   #define APP_IO_DONE 6
+   #define QUIT 5
+   #define COMP_QUIT 4
+   #define CHUNK_SCHEMA 3
+   #define CHUNK_DATA_FROM_IO 2
+   #define CHUNK_SCHEMA_DATA 1
+*/
+/* Modified it to make it compatible with my thesis */
+#define CHUNK_SCHEMA       1
+#define CHUNK_DATA_FROM_IO 2
+#define CHUNK_DATA_TO_IO   3
+
+#define COMP_QUIT          4
+#define QUIT               5
+#define ATTRIBUTE_SCHEMA   6 
+#define ATTRIBUTE_DATA     7 
+
+#define ARRAYGROUP_SCHEMA  8
+#define SPECIAL            9
+#define NO_MESSAGE        10
+
+/* Tags to indicate the type of special operatiosn required */
+#define APP_INFO 1
+#define APP_BARRIER 2
+#define GLOBAL_BARRIER 3
+#define CLEANFILES 4
+#define FLUSHFILES 5
+#define CREATEFILES 6
+
+typedef enum { UNSET,
+               Regular,
+               Irregular
+             } Distribution_Type;
+
+typedef enum { NONE,
+	       BLOCK,
+	       GENERAL,
+               CYCLIC
+	       } Distribution;
+
+typedef enum { HPF,
+               NAS,
+               GENERAL_BLOCK
+               } Block_Distribution;
+
+typedef enum { ROUND_ROBIN,
+               REGULAR
+               } ChunkAllocPolicy;
+
+
+typedef enum { NO = 0,
+               YES = 1
+	       } Boolean;
+
+
+typedef enum { ALLOC,
+	       NO_ALLOC,
+	       SHARED
+	       } DataStatus;
+		 
+
+
+inline int max(int a, int b)
+{
+  if (a > b) return a;
+  else return b;
+}
+
+inline int min(int a, int b)
+{
+  if (a < b) return a;
+  else return b;
+}
+
+
+inline int* copy_int_list(int s, int *l)
+{
+  int *ret_list = (int *) malloc(sizeof(int)*s);
+  for(int i=0;i<s;i++)
+    ret_list[i] = l[i];
+  return ret_list;
+}
+
+
+
+inline Distribution* copy_distribution(int num, Distribution *ptr)
+{
+  Distribution *ret_list = (Distribution *)malloc(sizeof(Distribution)*num);
+  
+  for(int i=0; i < num; i++)
+     ret_list[i] = ptr[i];
+
+  return ret_list;
+}
+
+
+inline Boolean equal_distribution(int size, Distribution* dist1, Distribution* dist2)
+{
+  for(int i=0; i < size; i++)
+  {
+    if (dist1[i] != dist2[i])
+	return NO;
+  }
+  return YES;
+}
+
+inline void pack_distribution(int **schema_buf, int rank, Distribution *in_dist)
+{
+  Distribution *dist = in_dist;
+  int* ptr = *schema_buf;
+  
+  for(int i=0;i<rank;i++)
+  	*ptr++ = (int) dist[i];  
+  *schema_buf = ptr;
+}
+
+inline Distribution* new_distribution(int **schema_buf, int rank)
+{
+  Distribution *dist = (Distribution*) malloc(sizeof(Distribution)*rank);
+  int *ptr = *schema_buf;
+  
+  for(int i=0;i<rank;i++)
+     dist[i] = (Distribution) *ptr++;
+
+  *schema_buf = ptr;
+  return  dist;
+
+}
+
+inline int num_elements(int r, int *size)
+{
+  int total=1;
+  for(int i=0;i<r;i++) total *= size[i];
+  return total;
+}
+
+           
+#endif
diff --git a/src/Panda/fulltime.C b/src/Panda/fulltime.C
new file mode 100644
index 0000000..dd195f0
--- /dev/null
+++ b/src/Panda/fulltime.C
@@ -0,0 +1,410 @@
+/*****************************************************************
+ *     This is a sample program that shows how the panda library *
+ *     is going to be used by the application programs.          *
+ *     The example command line format is in test7.script.       *
+ *     This example shows the interface with only disk layout    *
+ *     info but no stride or subchunking schema. The value for   *
+ *     those schemas use the default ones.                       *
+ *     The current test varies the size of arrays. However, the  *
+ *     wrapper function allows the number of the nodes to be     *
+ *     changed as well.                                          *
+ *     The first iteration loads all the code in memory.         *
+ *     The second run does the simulated disk simulation.        *
+ *     From the third run on, the values are the real writes.    *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+int BLK;
+
+int CYCLIC_ON_MEM = 0;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag=0;
+  char      time_message[100];
+
+#ifdef VERIFYBF
+    for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+    t1->set_verify();
+#endif
+
+
+  global_bear->cleanfiles();
+  global_bear->createfiles();
+
+
+  for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+     if (i < Num_Simulate_Write){
+              t1->set_simulate_mode();
+              flag=0;
+	    }
+     else {
+              t1->reset_simulate_mode();
+              flag=1;
+	   }
+
+
+    global_bear->app_barrier();
+    t1->set_io_strategy(STRATEGY);
+    timer.start();
+    t1->timestep();
+    timer.stop(":");
+    sprintf(time_message,"%s Write: SIZE: %d, BLK: %d, Time %i %s",
+		(flag==0? "Simulated":"Real"), 
+		arraysize, BLK, i, timer.get_description());
+    printf("%s", time_message);
+
+    if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1  ) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+
+    }  
+   }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag;
+  char time_message[100];
+#ifdef VERIFYBF
+      for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern(); 
+#endif
+
+  if (Num_Write + Num_Simulate_Write == 0) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+  }  
+
+
+
+  for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+    if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+    else {r1->reset_simulate_mode();
+          flag=1;
+	  global_bear->flushfiles();
+     }
+
+
+    global_bear->app_barrier();
+    r1->set_io_strategy(STRATEGY);
+    timer.start();
+    r1->read_timestep();
+    timer.stop(":");
+
+    sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ", 
+	(flag==0? "Simulated":"Real"),
+	arraysize, i, timer.get_description());
+    printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+    for(i=0;i<Num_of_Arrays;i++)
+	if (arrays[i]->verify_byte_pattern())
+		printf("Byte pattern verified for array %d\n", i);
+	else
+		printf("Byte pattern incorrect for array %d\n", i);
+#endif
+    global_bear->cleanfiles();
+}
+ 
+ 
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+           int mrank, int *mlayout, int drank, int *dlayout,
+           Distribution *mem_dist, int* blk_size, Distribution *disk_dist, int cost_model)
+{
+  ArrayLayout *mem1;              // Memory array layout
+  ArrayLayout *disk1;             // Disk array layout
+  int i;
+  Array **arrays;
+  arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+  mem1 = new ArrayLayout (mrank,mlayout);
+  disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation. 
+  char *name;
+  name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+  char temp[5];
+  for (i=0; i< Num_of_Arrays; i++) {
+    strcpy(name,"z1Array");
+    sprintf(temp, "%d", i);
+    strcat(name, temp);
+    arrays[i] = new Array(name,arrayrank,arraysize,esize,
+		     mem1,mem_dist,disk1, disk_dist);
+  }
+  free(name);
+
+  if (Num_Simulate_Write + Num_Write > 0) {
+    ArrayGroup *t1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  t1->insert(arrays[i]);
+    test_timestep(t1, arraysize[arrayrank-1], arrays);
+    delete t1;
+    if (Num_Simulate_Read + Num_Read > 0) {
+      ArrayGroup *r1 = new ArrayGroup("z4timestep");
+      for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+      test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+      delete r1;
+     }  
+  } else {
+
+    ArrayGroup *r1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+    test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+    delete r1;
+  }
+
+  // delete all objects created
+
+  for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+  free(arrays);
+  delete disk1;
+  delete mem1;
+  return(0);
+}
+
+char my_getopt(char *str)
+{
+  char command[24][15];
+
+  strcpy(command[0], "-Total_nodes");
+  strcpy(command[1], "-Io_nodes");
+  strcpy(command[2], "-upper");
+  strcpy(command[3], "-Arraysize");
+  strcpy(command[4], "-Esize");
+  strcpy(command[5], "-Mlayout");
+  strcpy(command[6], "-Dlayout");
+  strcpy(command[7], "-mem_dist");
+  strcpy(command[8], "-disk_dist");
+  strcpy(command[9], "-num_arrays");
+  strcpy(command[10], "-read_simulate");
+  strcpy(command[11], "-Read");
+  strcpy(command[12], "-write_simulate");
+  strcpy(command[13], "-Write");
+  strcpy(command[14], "-interleave");
+  strcpy(command[15], "-Cost_model");
+  strcpy(command[16], "-size_message");
+  strcpy(command[17], "-Xfactor");
+  strcpy(command[18], "-K");
+  
+  for (int i= 0; i< 24; i++)  
+    if (!strncmp(str, command[i], 2)) return command[i][1];
+  printf("undefined input %s, quit!\n",str);
+  exit(0); 
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes, 
+	      int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+	      int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+	      Distribution*& mem_dist, int*& blk_size, 
+	      Distribution*& disk_dist, int &cost_model_mode, int &upper_blk) 
+{
+  char opt;
+  int k;
+ 
+  for (int i=1; i<argc; ) {
+    opt = my_getopt(argv[i++]);
+    switch(opt) {
+      case 'X':
+        STRATEGY = atoi(argv[i++]);
+	break;
+      case 'T':
+        total_nodes =  atoi(argv[i++]); 
+	break;
+      case 'I': 
+	io_nodes = atoi(argv[i++]);
+	break;
+      case 'u': 
+	upper_bound = atoi(argv[i++]);  
+	break;
+      case 'A': 
+	arrayrank = atoi(argv[i++]);
+        arraysize = (int *) malloc(sizeof(int)* arrayrank);
+        mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+	blk_size = (int *) malloc(sizeof(int)*arrayrank);
+        disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+	lower_bound = arraysize[k-1];
+	break;	
+      case 'E': 
+	esize = atoi(argv[i++]); 
+	break;
+      case 'M': 
+	mrank = atoi(argv[i++]);
+	mlayout = (int *) malloc(sizeof(int)* mrank);
+        for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+	break;	  
+      case 'D':
+	drank = atoi(argv[i++]);
+	dlayout = (int *) malloc(sizeof(int)* drank);
+        for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+	break;	
+      case 'm':
+        for (k = 0; k < arrayrank; k++)
+        {
+            mem_dist[k] = (Distribution)atoi(argv[i++]);
+        }
+        break;
+      case 'd':
+        for (k = 0; k < arrayrank; k++)
+        {
+            disk_dist[k] = (Distribution)atoi(argv[i++]);
+        }
+        break;
+
+      case 'n':  
+	Num_of_Arrays = atoi(argv[i++]);
+        break;
+      case 'r': 
+	Num_Simulate_Read = atoi(argv[i++]);
+        break;
+      case 'R':
+ 	Num_Read = atoi(argv[i++]);
+	break;
+      case 'w':
+	Num_Simulate_Write = atoi(argv[i++]);
+	break;
+      case 'W':    	
+	Num_Write = atoi(argv[i++]);
+	break;
+      case 'i':
+	interleave = atoi(argv[i++]);
+	break;
+      case 'C':
+	cost_model_mode = atoi(argv[i++]);
+	break;
+      case 's':
+	SUBCHUNK_SIZE = atoi(argv[i++]);
+	break;
+
+      case 'K':
+	upper_blk = atoi(argv[i++]);
+	break;
+
+      /* For Panda internal library stuff */
+
+
+      }
+   }
+
+   printf("####### io nodes=%d \n", io_nodes);
+} 
+   		
+int main(int argc, char **argv)
+{
+  int total_nodes;                // The number of total nodes (comp + io)
+  int io_nodes;                   // The number of io nodes
+  int upper_bound;                // The upper bound of the last dimension of the array
+  int lower_bound;                // The starting number of the last dimension of the array
+  int arrayrank ;                 // The array rank.
+  int *arraysize;                 // The number of elements along each array dimention
+  int esize ;                     // element size of each array element
+  int mrank ;                     // Compute node mesh rank
+  int *mlayout;                   // Compute node mesh layout
+  int drank ;                     // IO node mesh rank
+  int cost_model_mode;           // Whether the cost model is included.
+  int *dlayout;                   // IO node mesh layout
+  Distribution *mem_dist;         // The memory array distribution along each dimention
+                                  // There are three possible distributions (BLOCK,
+                                  // NONE, CYCLIC).
+  int *blk_size;
+  Distribution *disk_dist;        // The disk array distribution along each dimention
+  int my_rank, my_app_size, *world_ranks, leader;
+
+  int upper_blk;		  // upper bound of the block size
+  int lower_blk;
+  Panda *bear;
+
+  
+  MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from 
+// this call, the io nodes will not return from the call. All the io nodes
+  
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+  leader = 0; 
+  world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+
+  parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, 
+	arrayrank, arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist, 
+	blk_size, disk_dist, cost_model_mode, upper_blk);
+
+  int q = total_nodes/io_nodes;
+
+  for (int i=0; i<io_nodes; i++)
+	world_ranks[i] = i*q;
+  for (int j=0; j<io_nodes; j++)
+      for (int k=1; k< q; k++)
+           world_ranks[i++] = j*q + k;
+
+/*
+  world_ranks[0] = 0;
+  world_ranks[1] = 3;
+  world_ranks[2] = 1;
+  world_ranks[3] = 2;
+  world_ranks[4] = 4;
+  world_ranks[5] = 5;
+
+  printf("myrank= %d, io_nodes=%d, total_nodes=%d \n", 
+	my_rank, io_nodes, total_nodes);
+*/
+  printf("world ranks \n");
+  for (i=0; i<my_app_size; i++)
+   printf(" %d", world_ranks[i]);
+  printf("\n\n");
+  
+  if (my_rank % q == 0)
+  { // io nodes
+     bear = new Panda(IO_NODE, 0, my_rank/q, io_nodes, world_ranks);
+     global_bear = bear;
+  }
+  else 
+  { // compute nodes
+     bear = new Panda(COMPUTE_NODE, 1, 
+			my_rank/q*(q-1)+(my_rank-1)%q, 
+			my_app_size-io_nodes, world_ranks+io_nodes);
+     global_bear = bear;
+
+     for (int size=lower_bound; size <= upper_bound; size*=2) 
+     {
+       arraysize[arrayrank-1] = size; 
+       gemein(bear,io_nodes, arrayrank, arraysize, esize, 
+              mrank, mlayout, drank, dlayout, mem_dist, blk_size, 
+	      disk_dist, cost_model_mode);
+     }
+
+  }
+
+  free(arraysize); free(mlayout); free(dlayout); free(mem_dist);
+  free(blk_size);
+  free(disk_dist);
+  free(world_ranks);
+  delete bear;
+
+  MPI_Finalize();
+  return(0);
+}
diff --git a/src/Panda/io_main.C b/src/Panda/io_main.C
new file mode 100644
index 0000000..69b0a63
--- /dev/null
+++ b/src/Panda/io_main.C
@@ -0,0 +1,83 @@
+#include "definitions.h"
+#include "StopWatch.h"
+#include "Panda.h"
+#include "ArrayGroup.h"
+
+extern MPIFS* MPIFS_global_obj;
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+Boolean shared_flag = NO;
+
+
+char my_getopt(char *str)
+{
+  char command[8][15];
+
+  strcpy(command[0], "-chunks");
+  strcpy(command[1], "-xmax_messages");
+  strcpy(command[2], "-tags");
+  strcpy(command[3], "-branching_factor");
+  strcpy(command[4], "-ymax_memory");
+  strcpy(command[5], "-flag");
+  strcpy(command[6], "-size_message");
+  strcpy(command[7], "-Shared");
+
+  for (int i= 0; i< 8; i++)  
+    if (!strncmp(str, command[i], 2)) return command[i][1];
+  printf("undefined input %s, quit!\n",str);
+  exit(0); 
+}
+
+void parse_cl(int argc, char **argv)
+{
+   char opt;
+   
+   for(int i=1; i< argc; ){
+	opt = my_getopt(argv[i++]);
+	switch(opt) {
+	case 'b' :
+	  BRANCHING_FACTOR = atoi(argv[i++]);
+	  break;
+	case 's':
+	  SUBCHUNK_SIZE = atoi(argv[i++]);
+	  break;
+	case 'S':
+	  shared_flag = (Boolean) atoi(argv[i++]);
+	  break;
+	}
+      }
+}
+
+main(int argc, char **argv)
+{
+  int  *world_ranks, my_rank, leader, app_size;
+  MPI_Init(&argc, &argv);
+  Panda *bear;
+  char cmd[100];
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+  sprintf(cmd , "rm -rf  %s", FILEPREFIX); 
+  //if (my_rank == 0) 
+     system(cmd);
+  sprintf(cmd , "mkdir %s", FILEPREFIX); 
+  //if (my_rank == 0) 
+     system(cmd);
+  MPI_Comm_size(MPI_COMM_WORLD, &app_size);
+  world_ranks = (int *) malloc(sizeof(int)*app_size);
+  leader = 0; 
+
+  for(int i=0;i< app_size; i++)
+	world_ranks[i] = leader+i;
+  parse_cl(argc, argv);
+  if (shared_flag){
+    bear = new Panda(IO_NODE, 0, my_rank, app_size,
+		     world_ranks, YES);
+  }
+  else {
+    bear = new Panda(IO_NODE, 0, my_rank, app_size, world_ranks);
+  }
+  delete bear;
+//  sprintf(cmd , "rm -rf  %s", FILEPREFIX); if (my_rank == 0) system(cmd);
+  MPI_Finalize();
+}
+
diff --git a/src/Panda/make.code.defn b/src/Panda/make.code.defn
new file mode 100644
index 0000000..dd9edc3
--- /dev/null
+++ b/src/Panda/make.code.defn
@@ -0,0 +1,77 @@
+SRCS = App_Info.C Array.C ArrayDistribution.C ArrayLayout.C Attribute.C Chunk.C Collective_IO.C List.C MPIFS.C Panda.C Simple_IO.C Template.C VirtFS.C c_interface.C 	
+
+SUBDIRS = 
+
+# The 9000 names of the cygwin tools and T3E...
+TMPUN   := $(shell uname)
+ifeq ($(TMPUN), CYGWIN32_95)
+UNAME   = CYGWIN
+else
+ifeq ($(TMPUN), CYGWIN32_NT)
+UNAME   = CYGWIN
+else
+ifeq ($(TMPUN), CYGWIN_NT-4.0)
+UNAME   = CYGWIN
+else 
+UNAME            := $(shell uname | perl -pe 's/(sn\d\d\d\d|jsimpson)/UNICOS\/mk/')
+endif
+endif
+endif
+
+# 64 Bit Irix
+ifeq ($(UNAME), IRIX64)
+
+CXXFLAGS += -DANSI -DFILEPREFIX -ptused 
+
+endif
+
+# 32 Bit Irix
+ifeq ($(UNAME), IRIX)
+
+CXXFLAGS += -DANSI -ptused 
+
+endif
+
+# HP
+ifeq ($(UNAME), HP-UX)
+
+CXXFLAGS += -DANSI -DHP 
+
+endif
+
+# Alpha
+ifeq ($(UNAME), OSF1)
+
+CXXFLAGS += -DANSI 
+
+endif
+
+# Linux
+ifeq ($(UNAME), Linux)
+
+CXXFLAGS += -DANSI 
+
+endif
+
+# Macintosh /PowerMach-MachTen
+ifeq ($(UNAME), machten)
+
+CXXFLAGS += -DANSI 
+
+endif
+
+# Cygwin / Win32
+ifeq ($(UNAME), CYGWIN)
+
+CFLAGS += -DANSI -DWIN32
+CXXFLAGS += -DANSI -DWIN32
+
+endif
+
+# T3E
+ifeq ($(UNAME), UNICOS/mk)
+
+CXXFLAGS += -DANSI -DT3E -hinstantiate=used 
+
+endif
+
diff --git a/src/Panda/makefile.hpux.mpich b/src/Panda/makefile.hpux.mpich
new file mode 100644
index 0000000..6ad74c5
--- /dev/null
+++ b/src/Panda/makefile.hpux.mpich
@@ -0,0 +1,19 @@
+# makefile part for hpux (yong 8/3/95) 
+include fileprefix
+MPICH_HOME	= /extra/ying/mpich
+MPIRUN_HOME	= /extra/ying/mpirun
+INCLUDE_DIR	= -I$(MPICH_HOME)/include -I$(MPIRUN_HOME)/include
+WGEN_DIR	= /extra/ying/mpich/profiling/wrappergen
+LIBS		= -L$(MPIRUN_HOME)/lib -lmpirun -L$(MPICH_HOME)/lib/$(ARCH)/$(COMM) -lmpi -lm -lV3 
+#LIBS		= -L$(MPIRUN_HOME)/lib -lmpirun -L$(MPICH_HOME)/lib/$(ARCH)/$(COMM) -lmpi -lpmpi -lm -lV3 
+#MPILIB          = $(MPICH_HOME)/lib/$(ARCH)/$(COMM)/libmpi.a 
+DEVICE          = ch_p4
+COMM            = ch_p4
+ARCH            = hpux
+AR		= /usr/gnu/bin/ar            # for aix, but also pretty standard
+CC		= gcc
+OPTFLAGS	= -g -Wall
+CFLAGS		=  -DMPID_NO_FORTRAN  -DHAS_XDR=1 \
+		-DHAVE_STDLIB_H=1 \
+		-DHAVE_SYSTEM=1 $(OPTFLAGS) $(INCLUDE_DIR) -DMPI_$(ARCH) \
+		-DTARGETHPUX -DFILEPREFIX=$(FILEPREFIXVAL)
diff --git a/src/Panda/makefile.ibm.mpif b/src/Panda/makefile.ibm.mpif
new file mode 100644
index 0000000..a51b052
--- /dev/null
+++ b/src/Panda/makefile.ibm.mpif
@@ -0,0 +1,11 @@
+# makefile part for aix with our MPIFS filesystem on MPIF (jozwiak 030795)
+include fileprefix
+INCLUDE_DIR	= -I/usr/local/include/ibm-mpi
+LIBS		= -lm -L/usr/local/lib/ibm-mpi -lmpirun 
+AR		= /bin/ar		# for aix
+CC		= mpCC
+#CC		= ./mpifxlC
+OPTFLAGS	= -g -DCOST_MODEL
+CFLAGS		= $(OPTFLAGS) $(INCLUDE_DIR) \
+		  -DTARGETAIX -DFILEPREFIX=$(FILEPREFIXVAL) \
+                  -DWRAPPERTEST -DNAS_MPIF 
diff --git a/src/Panda/makefile.proto b/src/Panda/makefile.proto
new file mode 100644
index 0000000..17805b3
--- /dev/null
+++ b/src/Panda/makefile.proto
@@ -0,0 +1,96 @@
+# makefile on 3-7-95 for C++ version of panda
+
+# REMOVE # for the intended build (NOTE: # is a comment, unlike for C)
+# include makefile.ibm.mpif
+# include makefile.ibm.mpich  # this one is flakey, use mpif
+# include makefile.sun.mpich
+# include makefile.unix.posix
+
+ARCHIVE	=	libeegads.a
+OFILES	=	Array.o		Chunk.o		Simple_IO.o	Panda.o \
+		ArrayLayout.o	List.o 		Collective_IO.o	\
+		MPIFS.o 	Attribute.o 	ArrayDistribution.o \
+		Template.o	VirtFS.o	App_Info.o	c_interface.o
+CFILES	=	
+
+all:		$(ARCHIVE)
+
+$(ARCHIVE):	$(OFILES)
+		$(AR) crv $(ARCHIVE) $(OFILES)
+
+Array.o:		Array.C  Array.h Template.h List.h MPIFS.h ArrayLayout.h definitions.h
+			$(CC) $(CFLAGS) -c Array.C 
+ArrayGroup.o:		ArrayGroup.C  ArrayGroup.h ArrayGroup.h MPIFS.h definitions.h
+			$(CC) $(CFLAGS) -c ArrayGroup.C 
+List.o:			List.C  List.h  definitions.h
+			$(CC) $(CFLAGS) -c List.C 
+ArrayLayout.o:		ArrayLayout.C  ArrayLayout.h Template.h definitions.h
+			$(CC) $(CFLAGS) -c ArrayLayout.C 
+Template.o:		Template.C  Template.h definitions.h
+			$(CC) $(CFLAGS) -c Template.C 
+VirtFS.o:		VirtFS.C  VirtFS.h
+			$(CC) -c $(CFLAGS) VirtFS.C 
+MPIFS.o:		MPIFS.C  MPIFS.h VirtFS.h Array.h Collective_IO.h Simple_IO.h definitions.h App_Info.h message.h
+			$(CC) -c $(CFLAGS) MPIFS.C 
+Panda.o:		Panda.C  Panda.h VirtFS.h  MPIFS.h definitions.h
+			$(CC) -c $(CFLAGS) Panda.C 
+Chunk.o:		Chunk.C  Chunk.h ArrayLayout.h Array.h definitions.h
+			$(CC) -c $(CFLAGS) Chunk.C 
+Collective_IO.o:	Collective_IO.C  Collective_IO.h  definitions.h
+			$(CC) -c $(CFLAGS) Collective_IO.C 
+Simple_IO.o:		Simple_IO.C  Simple_IO.h  Collective_IO.h Array.h MPIFS.h  definitions.h message.h
+			$(CC) -c $(CFLAGS) Simple_IO.C 
+CSDIO.o:		CSDIO.C  CSDIO.h Simple_IO.h  Collective_IO.h Array.h MPIFS.h  definitions.h message.h
+			$(CC) -c $(CFLAGS) CSDIO.C 
+Shared_IO.o:		Shared_IO.C Shared_IO.h  Simple_IO.h  Collective_IO.h Array.h MPIFS.h  definitions.h message.h
+			$(CC) -c $(CFLAGS) Shared_IO.C 
+CSDIO_Shared.o:		CSDIO_Shared.C CSDIO_Shared.h  CSDIO.h Simple_IO.h  Collective_IO.h ArrayGroup.h  Array.h MPIFS.h  definitions.h message.h
+			$(CC) -c $(CFLAGS) CSDIO_Shared.C 
+App_Info.o:		App_Info.C App_Info.h definitions.h
+			$(CC) -c $(CFLAGS) App_Info.C
+c_interface.o:		c_interface.C c_interface.h 
+			$(CC) -c $(CFLAGS) c_interface.C
+Attribute.o:		Attribute.C Attribute.h
+			$(CC) -c $(CFLAGS) Attribute.C
+ArrayDistribution.o:	ArrayDistribution.C ArrayDistribution.h
+			$(CC) -c $(CFLAGS) ArrayDistribution.C
+
+
+## Hey, Kent, how should we verify a build is indeed correct?
+## it seems that there is sort of a chicken and egg problem
+## here:  we need a manually verified set of correct runs
+## against which to test later builds and test runs...
+## i set up the little bit below so that one can do a 
+## `make test' to verify a corrrect build ...
+
+oneexe:		oneexe.C $(ARCHIVE)
+		$(CC) $(CFLAGS) oneexe.C -o oneexe -L. -leegads $(LIBS)
+
+io_main:	io_main.C $(ARCHIVE)
+		$(CC) $(CFLAGS) io_main.C -o io_main -L. -leegads $(LIBS)	 	
+
+compute_test:	compute_test.C $(ARCHIVE)
+		$(CC) $(CFLAGS) compute_test.C -o compute_test -L. -leegads $(LIBS)	 	
+
+part_test:	part_test.C $(ARCHIVE)
+		$(CC) $(CFLAGS) part_test.C -o part_test -L. -leegads $(LIBS)	 	
+
+shared_test:	shared_test.C $(ARCHIVE)
+		$(CC) $(CFLAGS) shared_test.C -o shared_test -L. -leegads $(LIBS)	 	
+
+cleantests:	;
+		- /bin/rm -f core 
+		- /bin/rm -f $(TESTDIR) io_main  compute_test part_test shared_test oneexe
+		sync
+
+clean:		cleantests
+		- /bin/rm -f $(OFILES) $(ARCHIVE) 
+#		- /bin/rm -f *~ PI* os-detected a.out mpi_test core *.o 
+#		- /bin/rm -f mputil.mp*.c makefile fileprefix mpirun.*
+		- /bin/rm -f makefile fileprefix 
+		sync
+
+configure:	; @echo "already configured, or this makefile wouldn't be here"
+		@echo "to reconfigure, make clean, then sh configure"
+
+
diff --git a/src/Panda/makefile.sgi.mpich b/src/Panda/makefile.sgi.mpich
new file mode 100644
index 0000000..f9071df
--- /dev/null
+++ b/src/Panda/makefile.sgi.mpich
@@ -0,0 +1,10 @@
+# makefile part for aix with our MPIFS filesystem on MPIF (jozwiak 030795)
+include fileprefix
+Cactus_HOME     = ../../..
+INCLUDE_DIR	= -I/usr/include -I$(Cactus_HOME)/lib/IEEEIO
+LIBS		= -lmpi -L$(Cactus_HOME)/irix6/obj -lieeeio
+AR		= /usr/bin/ar		# for aix
+CC		= CC
+OPTFLAGS	= -g 
+CFLAGS		= $(OPTFLAGS) $(INCLUDE_DIR) \
+		  -DFILEPREFIX=$(FILEPREFIXVAL) 
diff --git a/src/Panda/makefile.sun.mpich b/src/Panda/makefile.sun.mpich
new file mode 100644
index 0000000..4d00846
--- /dev/null
+++ b/src/Panda/makefile.sun.mpich
@@ -0,0 +1,18 @@
+# makefile part for bunny with our MPIFS filesystem on MPICH (jozwiak 030795)
+include fileprefix
+MPIR_HOME	= /home2/panda/MPI/mpich
+INCLUDE_DIR	= -I$(MPIR_HOME)/include 
+LIBS		= -L/home2/panda/MPI/mpich/lib/sun4/ch_p4 -lmpirun -lmpi -lm
+MPILIB		= $(MPIR_HOME)/lib/$(ARCH)/$(COMM)/libmpi.a 
+DEVICE		= ch_p4
+COMM		= ch_p4
+ARCH		= sun4
+AR		= /usr/5bin/ar		# for sunos (bsd)
+CC		= gcc
+OPTFLAGS	= -g -Wall 
+CFLAGS		= -DMPID_NO_FORTRAN  -DHAS_XDR=1 \
+		  -DHAVE_STDLIB_H=1 -DNAS_MPIF\
+		  -DHAVE_SYSTEM=1 $(OPTFLAGS) $(INCLUDE_DIR) -DMPI_$(ARCH) \
+		  -DTARGETSUNOS -DFILEPREFIX=$(FILEPREFIXVAL) \
+                  -DWRAPPERTEST   -DMPICH 
+# -DVERIFYBF  -DDEBUG
diff --git a/src/Panda/message.h b/src/Panda/message.h
new file mode 100644
index 0000000..f76998f
--- /dev/null
+++ b/src/Panda/message.h
@@ -0,0 +1,81 @@
+#ifndef message_dot_h
+#define message_dot_h
+
+    
+inline void send_message(void *buf, int count, MPI_Datatype data_type,
+		int dest, int tag, MPI_Comm comm)
+{
+   MPI_Send(buf,count,data_type,dest,tag,comm);
+#ifdef DEBUG
+   printf("Sending message to %d of size %d with tag %d\n",
+		dest, count, tag);
+#endif
+}
+
+inline void nb_send_message(void *buf, int count, MPI_Datatype data_type,
+		int dest, int tag, MPI_Comm comm, MPI_Request *request)
+{
+   MPI_Isend(buf,count,data_type,dest,tag,comm, request);
+#ifdef DEBUG
+   printf("Sending nonblocking message to %d of size %d with tag %d\n",
+		 dest, count, tag);
+#endif
+}
+
+
+inline void receive_message(void *buf, int count, MPI_Datatype datatype,
+	int src, int tag, MPI_Comm comm, MPI_Status *status)
+{
+   MPI_Recv(buf,count,datatype, src,tag,comm,status);
+#ifdef DEBUG
+   printf("Received message from %d of size %d with tag %d\n",
+		src, count, tag);
+#endif
+}
+
+
+inline void nb_receive_message(void *buf, int count, MPI_Datatype datatype,
+	int src, int tag, MPI_Comm comm, MPI_Request *request)
+{
+   MPI_Irecv(buf,count,datatype, src,tag,comm,request);
+#ifdef DEBUG
+   printf("Post a non-blocking receive for %d of size %d with tag %d\n",
+		src, count, tag);
+#endif
+}
+
+
+inline void mpi_test(MPI_Request *request, int *flag, MPI_Status *status)
+{
+  MPI_Test(request, flag, status);
+}
+
+
+inline void mpi_get_count(MPI_Status *status, MPI_Datatype datatype, int *len)
+{
+  MPI_Get_count(status, datatype, len);
+}
+
+
+inline void any_new_message(int *msg_code, int *msg_src,
+			int *msg_tag,MPI_Status *msg_status)
+{
+  int flag;
+
+  MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, msg_status);
+  if (!flag){
+    *msg_code = NO_MESSAGE;
+    *msg_src = -1;
+    *msg_tag = -1;
+    return;
+  }
+  else{
+    /* There some message waiting for us */
+    *msg_tag = msg_status->MPI_TAG;
+    *msg_src = msg_status->MPI_SOURCE;
+    *msg_code = msg_status->MPI_TAG % 10;
+    return;
+  }
+}
+
+#endif
diff --git a/src/Panda/oneexe.C b/src/Panda/oneexe.C
new file mode 100644
index 0000000..f9b6b07
--- /dev/null
+++ b/src/Panda/oneexe.C
@@ -0,0 +1,91 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi.h"
+#include "IO.h"
+#include "c_interface.h"
+
+extern "C" { int Panda_Create(int, char **, int, int); }
+extern "C" { void Panda_Finalize(); }
+extern "C" { void Panda_WriteAttribute(char *, char *, int, int, void *); }
+extern "C" { void *Panda_ReadAttribute(char *, char *, int *, int *); }
+extern "C" { void PandaTimestep(struct ArrayInfo *); }
+extern "C" { char *PandaReadTimestep(struct ArrayInfo *); }
+
+int main(int argc, char **argv)
+{
+  int my_rank, i, j, k;
+  ArrayInfo ainfo;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+//  Panda_Create(argc, argv, 2, 1);
+  if (Panda_Create(argc, argv, 2, 0))  { MPI_Finalize();  return 1; } 
+
+  // Timestep-write  
+/*  int size[3] = {16, 4, 4};
+  int mem_layout[3] = {2, 1, 1};
+  Distribution mem_dist[3] = {BLOCK, BLOCK, BLOCK};
+  int disk_layout[1] = {2};
+  Distribution disk_dist[3] = {BLOCK, NONE, NONE};
+  int *data = (int *)malloc(sizeof(int) * 128);
+  ainfo.name_ = "./panda.out";
+  ainfo.rank_ = 3;
+  ainfo.size_ = size;
+  ainfo.esize_ = INT32;
+  ainfo.mem_rank_ = 3;
+  ainfo.mem_layout_ = mem_layout;
+  ainfo.mem_dist_ = mem_dist;
+  ainfo.disk_rank_ = 1;
+  ainfo.disk_layout_ = disk_layout;
+  ainfo.disk_dist_ = disk_dist;
+  ainfo.data_ = (char*)data;
+
+  for (i=0; i<8; i++)
+    for (j=0; j<4; j++) 
+      for (k=0; k<4; k++) data[i*16+j*4+k] = i*16+j*4+k + my_rank;
+
+  ainfo.stencil_width_ = 0;
+  PandaTimestep(&ainfo); 
+  Panda_WriteAttribute("./panda.out", "global_size", INT32, 3, size);
+
+  printf("%d - ", my_rank);
+  for (i=0; i<8; i++)
+    for (j=0; j<4; j++) 
+      for (k=0; k<4; k++) printf("%d ", data[i*16+j*4+k]);
+  printf("\n"); fflush(stdout);
+  free(data);        */
+
+  // ReadTimeste-write  
+  int mem_layout[3] = {2, 1, 1};;
+  Distribution mem_dist[3] = {BLOCK, BLOCK, BLOCK};
+  ainfo.name_ = "./panda.out";
+  ainfo.rank_ = 3;
+  ainfo.size_ = NULL;
+  ainfo.esize_ = 0;
+  ainfo.mem_rank_ = 3;
+  ainfo.mem_layout_ = mem_layout;
+  ainfo.mem_dist_ = mem_dist;
+  ainfo.disk_rank_ = 0;
+  ainfo.disk_layout_ = NULL;
+  ainfo.disk_dist_ = NULL;
+  ainfo.data_ = NULL;
+
+  int *data = (int *)PandaReadTimestep(&ainfo); 
+
+  printf("%d - ", my_rank);
+  for (i=0; i<8; i++)
+    for (j=0; j<4; j++) 
+      for (k=0; k<4; k++) printf("%d ", data[i*16+j*4+k]);
+  printf("\n"); fflush(stdout);
+  free(data);     
+
+  int type, count;
+  int *data1 = (int *)Panda_ReadAttribute("./panda.out", "global_size", 
+					 &type,  &count);
+  printf("%d: data type %d, count %d, contents: ", my_rank, type, count);
+  for (i=0; i<count; i++) printf("%d ", data1[i]);
+  printf("\n"); 
+  free(data1); 
+
+  Panda_Finalize();
+  MPI_Finalize();
+}
diff --git a/src/Panda/os-detected b/src/Panda/os-detected
new file mode 100644
index 0000000..4f378d7
--- /dev/null
+++ b/src/Panda/os-detected
@@ -0,0 +1 @@
+irix
diff --git a/src/Panda/part_test.C b/src/Panda/part_test.C
new file mode 100644
index 0000000..03a7c2c
--- /dev/null
+++ b/src/Panda/part_test.C
@@ -0,0 +1,385 @@
+/*****************************************************************
+ *     This is a sample program that shows how the panda library *
+ *     is going to be used by the application programs.          *
+ *     The example command line format is in test7.script.       *
+ *     This example shows the interface with only disk layout    *
+ *     info but no stride or subchunking schema. The value for   *
+ *     those schemas use the default ones.                       *
+ *     The current test varies the size of arrays. However, the  *
+ *     wrapper function allows the number of the nodes to be     *
+ *     changed as well.                                          *
+ *     The first iteration loads all the code in memory.         *
+ *     The second run does the simulated disk simulation.        *
+ *     From the third run on, the values are the real writes.    *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+int world_rank;
+
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag=0;
+  char      time_message[100];
+
+#ifdef VERIFYBF
+    for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+    t1->set_verify();
+#endif
+
+
+  global_bear->cleanfiles();
+  global_bear->createfiles(); 
+
+  for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+     if (i < Num_Simulate_Write){
+              t1->set_simulate_mode();
+              flag=0;
+	    }
+     else {
+              t1->reset_simulate_mode();
+              flag=1;
+	      global_bear->cleanfiles();
+	      global_bear->createfiles(); 
+	   }
+
+
+    global_bear->app_barrier();
+    t1->set_io_strategy(STRATEGY); 
+    timer.start();
+    t1->timestep();
+    timer.stop(":");
+    sprintf(time_message,"%s Write: SIZE: %d, Time %i %s",
+		(flag==0? "Simulated":"Real"), 
+		arraysize, i, timer.get_description());
+    printf("%s", time_message);
+
+    if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1  ) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+
+    }    
+   }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag;
+  char time_message[100];
+#ifdef VERIFYBF
+      for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern(); 
+#endif
+
+  if (Num_Write + Num_Simulate_Write == 0) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+  }  
+
+
+
+  for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+    if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+    else {r1->reset_simulate_mode();
+          flag=1;
+	  global_bear->flushfiles();
+     }
+
+
+    global_bear->app_barrier();
+    r1->set_io_strategy(STRATEGY);
+    timer.start();
+    r1->restart();
+    timer.stop(":");
+
+    sprintf(time_message,"%s Read: SIZE: %d, Time %i %s ", 
+	(flag==0? "Simulated":"Real"),
+	arraysize, i, timer.get_description());
+    printf("%s", time_message);
+
+ }
+#ifdef VERIFYBF
+    for(i=0;i<Num_of_Arrays;i++)
+	if (arrays[i]->verify_byte_pattern())
+		printf("%d:Byte pattern verified for array %d\n", world_rank, i);
+	else
+		printf("%d:Byte pattern incorrect for array %d\n",world_rank,i);
+#endif
+    global_bear->cleanfiles();
+}
+ 
+ 
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+           int mrank, int *mlayout, int drank, int *dlayout,
+           Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+  ArrayLayout *mem1;              // Memory array layout
+  ArrayLayout *disk1;             // Disk array layout
+  int i;
+  Array **arrays;
+  arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+  mem1 = new ArrayLayout (mrank,mlayout);
+  disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation. 
+  char *name;
+  name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+  char temp[5];
+  for (i=0; i< Num_of_Arrays; i++) {
+    strcpy(name,"z1Array");
+    sprintf(temp, "%d", i);
+    strcat(name, temp);
+    arrays[i] = new Array(name,arrayrank,arraysize,esize,
+		     mem1,mem_dist,disk1, disk_dist);
+  }
+  free(name);
+
+  if (Num_Simulate_Write + Num_Write > 0) {
+    ArrayGroup *t1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  t1->insert(arrays[i]);
+    test_timestep(t1, arraysize[arrayrank-1], arrays);
+    delete t1;
+    if (Num_Simulate_Read + Num_Read > 0) {
+      ArrayGroup *r1 = new ArrayGroup("z4timestep");
+      for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+      test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+      delete r1;
+     }  
+  } else {
+
+    ArrayGroup *r1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+    test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+    delete r1;
+  }
+
+  // delete all objects created
+
+  for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+  free(arrays);
+  delete disk1;
+  delete mem1;
+  return(0);
+}
+
+char my_getopt(char *str)
+{
+  char command[25][15];
+
+  strcpy(command[0], "-Total_nodes");
+  strcpy(command[1], "-Io_nodes");
+  strcpy(command[2], "-upper");
+  strcpy(command[3], "-Arraysize");
+  strcpy(command[4], "-Esize");
+  strcpy(command[5], "-Mlayout");
+  strcpy(command[6], "-Dlayout");
+  strcpy(command[7], "-mem_dist");
+  strcpy(command[8], "-disk_dist");
+  strcpy(command[9], "-num_arrays");
+  strcpy(command[10], "-read_simulate");
+  strcpy(command[11], "-Read");
+  strcpy(command[12], "-write_simulate");
+  strcpy(command[13], "-Write");
+  strcpy(command[14], "-interleave");
+  strcpy(command[15], "-Cost_model");
+  strcpy(command[16], "-chunks");
+  strcpy(command[17], "-xmax_messages");
+  strcpy(command[18], "-tags");
+  strcpy(command[19], "-branching_factor");
+  strcpy(command[20], "-ymax_memory");
+  strcpy(command[21], "-flag");
+  strcpy(command[22], "-size_message");
+  strcpy(command[23], "-Xfactor");
+  strcpy(command[24], "-Optimize");
+  
+  for (int i= 0; i< 25; i++)  
+    if (!strncmp(str, command[i], 2)) return command[i][1];
+  printf("undefined input %s, quit!\n",str);
+  return NULL;
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes, 
+	      int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+	      int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+	      Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode)
+{
+  char opt;
+  int k;
+ 
+  for (int i=1; i<argc; ) {
+    opt = my_getopt(argv[i++]);
+    switch(opt) 
+    {
+      case 'X':
+        STRATEGY = atoi(argv[i++]);
+	break;
+      case 'T':
+        total_nodes =  atoi(argv[i++]); 
+	break;
+      case 'I': 
+	io_nodes = atoi(argv[i++]);
+	break;
+      case 'u': 
+	upper_bound = atoi(argv[i++]);  
+	break;
+      case 'A': 
+	arrayrank = atoi(argv[i++]);
+        arraysize = (int *) malloc(sizeof(int)* arrayrank);
+        mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+	lower_bound = arraysize[k-1];
+	break;	
+      case 'E': 
+	esize = atoi(argv[i++]); 
+	break;
+      case 'M': 
+	mrank = atoi(argv[i++]);
+	mlayout = (int *) malloc(sizeof(int)* mrank);
+        for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+	break;	  
+      case 'D':
+	drank = atoi(argv[i++]);
+	dlayout = (int *) malloc(sizeof(int)* drank);
+        for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+	break;	
+      case 'm':
+	for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'd':
+	for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'n':  
+	Num_of_Arrays = atoi(argv[i++]);
+        break;
+      case 'r': 
+	Num_Simulate_Read = atoi(argv[i++]);
+        break;
+      case 'R':
+ 	Num_Read = atoi(argv[i++]);
+	break;
+      case 'w':
+	Num_Simulate_Write = atoi(argv[i++]);
+	break;
+      case 'W':    	
+	Num_Write = atoi(argv[i++]);
+	break;
+      case 'i':
+	interleave = atoi(argv[i++]);
+	break;
+      case 'C':
+	cost_model_mode = atoi(argv[i++]);
+	break;
+      case 'b' :
+	BRANCHING_FACTOR = atoi(argv[i++]);
+	break;
+      case 's':
+	SUBCHUNK_SIZE = atoi(argv[i++]);
+	break;
+      }
+   }
+} 
+   		
+
+int main(int argc, char **argv)
+{
+  int total_nodes;                // The number of total nodes (comp + io)
+  int io_nodes;                   // The number of io nodes
+  int upper_bound;                // The upper bound of the last dimension of the array
+  int lower_bound;                // The starting number of the last dimension of the array
+  int arrayrank ;                 // The array rank.
+  int *arraysize;                 // The number of elements along each array dimention
+  int esize ;                     // element size of each array element
+  int mrank ;                     // Compute node mesh rank
+  int *mlayout;                   // Compute node mesh layout
+  int drank ;                     // IO node mesh rank
+  int cost_model_mode;           // Whether the cost model is included.
+  int *dlayout;                   // IO node mesh layout
+  Distribution *mem_dist;         // The memory array distribution along each dimention
+                                  // There are three possible distributions (BLOCK,
+                                  // NONE, CYCLIC).
+  Distribution *disk_dist;        // The disk array distribution along each dimention
+  int my_rank, my_app_size, *world_ranks, leader;
+  char sys_command[100];
+  
+  MPI_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from 
+// this call, the io nodes will not return from the call. All the io nodes
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &my_app_size);
+  leader = 0; 
+  world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+  for(int i=0;i< my_app_size; i++)
+        world_ranks[i] = leader+i;
+
+
+
+  Panda *bear;
+  int my_io_rank = my_rank;
+  int *io_ranks;
+
+  parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, 
+          arrayrank, arraysize, esize, mrank, mlayout, drank, dlayout, 
+          mem_dist, disk_dist, cost_model_mode); 
+
+  io_ranks = world_ranks;
+
+
+  if (my_io_rank<io_nodes)
+  {
+	global_bear = new Panda(PART_TIME_IO, my_rank, my_app_size, world_ranks,
+				 my_io_rank, io_nodes, io_ranks);
+	bear = global_bear;
+  }
+  else
+  {
+	global_bear = new Panda(PART_TIME_COMPUTE, my_rank, my_app_size, world_ranks,
+				 -1, io_nodes, io_ranks);
+	bear = global_bear;
+  }
+  for (int size=lower_bound; size <= upper_bound; size*=2) {
+    arraysize[arrayrank-1] = size;
+    gemein(bear,io_nodes, arrayrank, arraysize, esize, 
+           mrank, mlayout, drank, dlayout, mem_dist, 
+	   disk_dist, cost_model_mode);
+  }
+
+  free(mlayout);
+  free(dlayout);
+  free(mem_dist);
+  free(disk_dist);
+  free(world_ranks);
+  delete bear;
+
+  MPI_Finalize();
+  return(0);
+}
diff --git a/src/Panda/shared_test.C b/src/Panda/shared_test.C
new file mode 100644
index 0000000..00ebaa1
--- /dev/null
+++ b/src/Panda/shared_test.C
@@ -0,0 +1,353 @@
+/*****************************************************************
+ *     This is a sample program that shows how the panda library *
+ *     is going to be used by the application programs.          *
+ *     The example command line format is in test7.script.       *
+ *     This example shows the interface with only disk layout    *
+ *     info but no stride or subchunking schema. The value for   *
+ *     those schemas use the default ones.                       *
+ *     The current test varies the size of arrays. However, the  *
+ *     wrapper function allows the number of the nodes to be     *
+ *     changed as well.                                          *
+ *     The first iteration loads all the code in memory.         *
+ *     The second run does the simulated disk simulation.        *
+ *     From the third run on, the values are the real writes.    *
+ *****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "definitions.h"
+#include "StopWatch.h"
+#include "ArrayGroup.h"
+#include "ArrayLayout.h"
+#include "Array.h"
+#include "Panda.h"
+#include "mpirun.h"
+
+int Num_of_Arrays = 1;
+int Num_Simulate_Read = 0;
+int Num_Read = 0;
+int Num_Simulate_Write = 2;
+int Num_Write = 2 ;
+int interleave = 0;
+Panda *global_bear;
+
+extern int BRANCHING_FACTOR;
+extern int SUBCHUNK_SIZE;
+int STRATEGY = 1;
+
+void test_timestep(ArrayGroup *t1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag=0;
+  char      time_message[100];
+
+#ifdef VERIFYBF
+    for (int j=0; j<Num_of_Arrays; j++) arrays[j]->set_byte_pattern();
+    t1->set_verify();
+#endif
+
+
+  global_bear->cleanfiles();
+  global_bear->createfiles();
+
+
+  for (i=0; i<Num_Simulate_Write+Num_Write; ++i) {
+     if (i < Num_Simulate_Write){
+              t1->set_simulate_mode();
+              flag=0;
+	    }
+     else {
+              t1->reset_simulate_mode();
+              flag=1;
+	   }
+
+
+    global_bear->global_barrier();
+    t1->set_io_strategy(STRATEGY); 
+    timer.start();
+    t1->timestep();
+    timer.stop(":");
+    sprintf(time_message,"App_id %d: %s Write: SIZE: %d, Time %i %s",
+		MPIRUN_APP_ID, (flag==0? "Simulated":"Real"), 
+		arraysize, i, timer.get_description());
+    printf("%s", time_message);
+
+    if (Num_Read + Num_Simulate_Read == 0 || i < Num_Simulate_Write + Num_Write-1  ) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+
+    }  
+   }
+}
+
+void test_readtimestep(ArrayGroup *r1, int arraysize, Array **arrays)
+{
+  StopWatch timer;
+  int       i;
+  int       flag;
+  char time_message[100];
+#ifdef VERIFYBF
+      for (int j=0; j<Num_of_Arrays; j++) arrays[j]->reset_byte_pattern(); 
+#endif
+
+  if (Num_Write + Num_Simulate_Write == 0) {
+	  global_bear->cleanfiles();
+	  global_bear->createfiles();
+  }  
+
+
+
+  for (i=0; i<Num_Simulate_Read+Num_Read; ++i) {
+    if (i < Num_Simulate_Read) { r1->set_simulate_mode(); flag=0; }
+    else {r1->reset_simulate_mode();
+          flag=1;
+	  global_bear->flushfiles();
+     }
+
+
+    global_bear->global_barrier();
+    r1->set_io_strategy(STRATEGY); 
+    timer.start();
+    r1->read_timestep();
+    timer.stop(":");
+
+    sprintf(time_message,"App_id %d: %s Read: SIZE: %d, Time %i %s ", 
+	MPIRUN_APP_ID, (flag==0? "Simulated":"Real"),
+	arraysize, i, timer.get_description());
+    printf("%s", time_message);
+ }
+#ifdef VERIFYBF
+    for(i=0;i<Num_of_Arrays;i++)
+	if (arrays[i]->verify_byte_pattern())
+		printf("Byte pattern verified for array %d\n", i);
+	else
+		printf("Byte pattern incorrect for array %d\n", i);
+#endif
+    global_bear->cleanfiles();
+}
+ 
+ 
+int gemein(Panda *bear, int io_nodes, int arrayrank, int *arraysize, int esize,
+           int mrank, int *mlayout, int drank, int *dlayout,
+           Distribution *mem_dist, Distribution *disk_dist, int cost_model)
+{
+  ArrayLayout *mem1;              // Memory array layout
+  ArrayLayout *disk1;             // Disk array layout
+  int i;
+  Array **arrays;
+  arrays = (Array **)malloc(sizeof(Array*)*Num_of_Arrays);
+
+// Set up memory and disk layouts
+  mem1 = new ArrayLayout (mrank,mlayout);
+  disk1 = new ArrayLayout(drank,dlayout);
+
+// Create an Array for computation. 
+  char *name;
+  name = (char *)malloc(sizeof(char)*(strlen("z1Array")+5));
+  char temp[5];
+  for (i=0; i< Num_of_Arrays; i++) {
+    strcpy(name,"z1Array");
+    sprintf(temp, "%d", i);
+    strcat(name, temp);
+    arrays[i] = new Array(name,arrayrank,arraysize,esize,
+		     mem1,mem_dist,disk1, disk_dist);
+  }
+  free(name);
+
+  if (Num_Simulate_Write + Num_Write > 0) {
+    ArrayGroup *t1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  t1->insert(arrays[i]);
+    test_timestep(t1, arraysize[arrayrank-1], arrays);
+    delete t1;
+    if (Num_Simulate_Read + Num_Read > 0) {
+      ArrayGroup *r1 = new ArrayGroup("z4timestep");
+      for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+      test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+      delete r1;
+     }  
+  } else {
+
+    ArrayGroup *r1 = new ArrayGroup("z4timestep");
+    for (i= 0; i<Num_of_Arrays; i++)  r1->insert(arrays[i]);
+    test_readtimestep(r1, arraysize[arrayrank-1], arrays);
+    delete r1;
+  }
+
+  // delete all objects created
+
+  for (i=0; i<Num_of_Arrays; i++) delete arrays[i];
+  free(arrays);
+  delete disk1;
+  delete mem1;
+  return(0);
+}
+
+char my_getopt(char *str)
+{
+  char command[18][15];
+
+  strcpy(command[0], "-Total_nodes");
+  strcpy(command[1], "-Io_nodes");
+  strcpy(command[2], "-upper");
+  strcpy(command[3], "-Arraysize");
+  strcpy(command[4], "-Esize");
+  strcpy(command[5], "-Mlayout");
+  strcpy(command[6], "-Dlayout");
+  strcpy(command[7], "-mem_dist");
+  strcpy(command[8], "-disk_dist");
+  strcpy(command[9], "-num_arrays");
+  strcpy(command[10], "-read_simulate");
+  strcpy(command[11], "-Read");
+  strcpy(command[12], "-write_simulate");
+  strcpy(command[13], "-Write");
+  strcpy(command[14], "-interleave");
+  strcpy(command[15], "-Cost_model");
+  strcpy(command[16], "-size_message");
+  strcpy(command[17], "-Xfactor");
+
+  for (int i= 0; i< 18; i++)  
+    if (!strncmp(str, command[i], 2)) return command[i][1];
+  printf("undefined input %s, quit!\n",str);
+  exit(0); 
+}
+
+void parse_cl(int argc, char **argv, int &total_nodes, int &io_nodes, 
+	      int &upper_bound, int &lower_bound, int &arrayrank, int*& arraysize,
+	      int &esize, int &mrank, int*& mlayout, int& drank, int*& dlayout,
+	      Distribution*& mem_dist, Distribution*& disk_dist, int &cost_model_mode) 
+{
+  char opt;
+  int k;
+ 
+  for (int i=1; i<argc; ) {
+    opt = my_getopt(argv[i++]);
+    switch(opt) {
+      case 'X':
+        STRATEGY = atoi(argv[i++]);
+	break;
+      case 's':
+        SUBCHUNK_SIZE = atoi(argv[i++]);
+        break;
+      case 'T':
+        total_nodes =  atoi(argv[i++]); 
+	break;
+      case 'I': 
+	io_nodes = atoi(argv[i++]);
+	break;
+      case 'u': 
+	upper_bound = atoi(argv[i++]);  
+	break;
+      case 'A': 
+	arrayrank = atoi(argv[i++]);
+        arraysize = (int *) malloc(sizeof(int)* arrayrank);
+        mem_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        disk_dist = (Distribution *)malloc(sizeof(Distribution)*arrayrank);
+        for (k = 0; k < arrayrank; k++) arraysize[k] = atoi(argv[i++]);
+	lower_bound = arraysize[k-1];
+	break;	
+      case 'E': 
+	esize = atoi(argv[i++]); 
+	break;
+      case 'M': 
+	mrank = atoi(argv[i++]);
+	mlayout = (int *) malloc(sizeof(int)* mrank);
+        for (k = 0; k < mrank; k++) mlayout[k] = atoi(argv[i++]);
+	break;	  
+      case 'D':
+	drank = atoi(argv[i++]);
+	dlayout = (int *) malloc(sizeof(int)* drank);
+        for (k = 0; k < drank; k++) dlayout[k] = atoi(argv[i++]);
+	break;	
+      case 'm':
+	for (k = 0; k < arrayrank; k++) mem_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'd':
+	for (k = 0; k < arrayrank; k++) disk_dist[k] = (Distribution)atoi(argv[i++]);
+	break;
+      case 'n':  
+	Num_of_Arrays = atoi(argv[i++]);
+        break;
+      case 'r': 
+	Num_Simulate_Read = atoi(argv[i++]);
+        break;
+      case 'R':
+ 	Num_Read = atoi(argv[i++]);
+	break;
+      case 'w':
+	Num_Simulate_Write = atoi(argv[i++]);
+	break;
+      case 'W':    	
+	Num_Write = atoi(argv[i++]);
+	break;
+      case 'i':
+	interleave = atoi(argv[i++]);
+	break;
+      case 'C':
+	cost_model_mode = atoi(argv[i++]);
+	break;
+      }
+   }
+} 
+   		
+int main(int argc, char **argv)
+{
+  int total_nodes;                // The number of total nodes (comp + io)
+  int io_nodes;                   // The number of io nodes
+  int upper_bound;                // The upper bound of the last dimension of the array
+  int lower_bound;                // The starting number of the last dimension of the array
+  int arrayrank ;                 // The array rank.
+  int *arraysize;                 // The number of elements along each array dimention
+  int esize ;                     // element size of each array element
+  int mrank ;                     // Compute node mesh rank
+  int *mlayout;                   // Compute node mesh layout
+  int drank ;                     // IO node mesh rank
+  int cost_model_mode;           // Whether the cost model is included.
+  int *dlayout;                   // IO node mesh layout
+  Distribution *mem_dist;         // The memory array distribution along each dimention
+                                  // There are three possible distributions (BLOCK,
+                                  // NONE, CYCLIC).
+  Distribution *disk_dist;        // The disk array distribution along each dimention
+  int my_rank, my_app_size, *world_ranks, leader;
+
+  
+  MPI_Init(&argc, &argv);
+  MPIRUN_Init(&argc, &argv);
+
+// For Parallel architecture (IBM SP2 like),
+// Initialize the MPI environment. Only compute nodes will return from 
+// this call, the io nodes will not return from the call. All the io nodes
+  
+  MPI_Comm_rank(MPIRUN_APP_COMM, &my_rank);
+  MPI_Comm_size(MPIRUN_APP_COMM, &my_app_size);
+  leader = MPIRUN_APP_LEADERS[MPIRUN_APP_ID];
+  world_ranks = (int *) malloc(sizeof(int)*my_app_size);
+  for(int i=0;i< my_app_size; i++)
+        world_ranks[i] = leader+i;
+  printf("MPIRUN_APP_ID = %d\n", MPIRUN_APP_ID);
+  Panda * bear = new Panda(COMPUTE_NODE, MPIRUN_APP_ID, my_rank, my_app_size, 
+			world_ranks);
+  global_bear = bear;
+
+
+
+
+  parse_cl(argc, argv, total_nodes, io_nodes, upper_bound, lower_bound, arrayrank, 
+          arraysize, esize, mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+  for (int size=lower_bound; size <= upper_bound; size*=2) {
+    arraysize[arrayrank-1] = size;
+    gemein(bear,io_nodes, arrayrank, arraysize, esize, 
+           mrank, mlayout, drank, dlayout, mem_dist, disk_dist, cost_model_mode);
+  }
+  free(arraysize);
+  free(mlayout);
+  free(dlayout);
+  free(mem_dist);
+  free(disk_dist);
+  free(world_ranks);
+  delete bear;
+  MPI_Finalize();
+  return(0);
+}
diff --git a/src/Startup.c b/src/Startup.c
new file mode 100644
index 0000000..500c6bd
--- /dev/null
+++ b/src/Startup.c
@@ -0,0 +1,77 @@
+ /*@@
+   @file      Startup.c
+   @date      01 Oct 1999
+   @author    Jonghyun Lee
+   @desc      Startup routines for IOPanda.
+   @enddesc 
+   @history
+   @endhistory
+ @@*/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "cctk.h"
+#include "cctk_Flesh.h"
+#include "cctk_GHExtensions.h"
+#include "cctk_parameters.h"
+#include "CactusBase/IOUtil/src/ioGH.h"
+
+/* prototypes of functions to be registered */
+int IOPanda_Output3DGH (cGH *GH);
+int IOPanda_TriggerOutput3D (cGH *GH, int);
+int IOPanda_TimeFor3D (cGH *GH, int);
+int IOPanda_Output3DVarAs (cGH *GH, const char *var, const char *alias);
+void *IOPanda_SetupGH (tFleshConfig *config, int convergence_level, cGH *GH);
+int IOPanda_InitGH (cGH *GH);
+int IOPanda_RecoverGH (cGH *GH, const char *basename, int called_from);
+
+  //void Panda_Create(int, int);
+void Panda_Finalize(void);
+
+ /*@@
+   @routine   IOPanda_Startup
+   @date      Fri May 21 1999
+   @author    Thomas Radke
+   @desc 
+   The startup registration routine for IOPanda.
+   Registers the GH extensions needed for IOPanda and
+   the registerable routines used for each method of IOPanda.
+   IOPanda does not overload any functions.
+   @enddesc 
+   @calls     
+   @calledby   
+   @history 
+ 
+   @endhistory 
+
+@@*/
+void IOPanda_Startup (void)
+{
+  int IO_GHExtension;
+  int IOMethod;
+
+  IO_GHExtension = CCTK_RegisterGHExtension ("IOPanda");
+  CCTK_RegisterGHExtensionSetupGH (IO_GHExtension, IOPanda_SetupGH);
+  CCTK_RegisterGHExtensionInitGH (IO_GHExtension, IOPanda_InitGH);
+
+  /* Register the 3D IOPandaIO routines as output methods  */
+  IOMethod = CCTK_RegisterIOMethod ("IOPandaIO_3D");
+  CCTK_RegisterIOMethodOutputGH (IOMethod, IOPanda_Output3DGH);
+  CCTK_RegisterIOMethodOutputVarAs (IOMethod, IOPanda_Output3DVarAs);
+  CCTK_RegisterIOMethodTimeToOutput (IOMethod, IOPanda_TimeFor3D);
+  CCTK_RegisterIOMethodTriggerOutput (IOMethod, IOPanda_TriggerOutput3D);
+
+#if 0
+  /* Register the IOPanda recovery routine to thorn IOUtil */
+  if (IOUtil_RegisterRecover ("IOPanda recovery", IOPanda_RecoverGH) < 0)
+    CCTK_WARN (1, "Failed to register IOPanda recovery routine");
+  Panda_Create(1, 1);
+#endif
+
+}
+
+void IOPanda_Finalize(void)
+{
+  Panda_Finalize();
+}
diff --git a/src/ioPandaGH.h b/src/ioPandaGH.h
new file mode 100644
index 0000000..f4da8d7
--- /dev/null
+++ b/src/ioPandaGH.h
@@ -0,0 +1,32 @@
+ /*@@
+   @header    ioPandaGH.h
+   @date      01 Oct 1999
+   @author    Jonghyun Lee
+   @desc      The extensions to the GH structure from IOPanda.
+   @history
+   @endhistory
+ @@*/
+
+#include <string.h>
+
+#include "StoreNamedData.h"
+
+
+typedef struct IOPandaGH {
+
+  /* The number of times output */
+  int   *IO_3Dnum;
+
+  /* How often to output */
+  int  IO_3Devery;
+
+  /* Directory in which to output */
+  char  *outpfx_3D;
+
+  /* The last iteration output */
+  int   *IO_3Dlast;
+
+  /* filename database for opened files */
+  pNamedData *fileList_3D;
+
+} pandaGH;
diff --git a/src/make.code.defn b/src/make.code.defn
new file mode 100644
index 0000000..174c78b
--- /dev/null
+++ b/src/make.code.defn
@@ -0,0 +1,3 @@
+SRCS = 	Startup.c GHExtension.c Output3D.c DumpVar.c 
+
+SUBDIRS = Panda
diff --git a/src/make.configuration.defn b/src/make.configuration.defn
new file mode 100644
index 0000000..64d3f73
--- /dev/null
+++ b/src/make.configuration.defn
@@ -0,0 +1,21 @@
+# make.configuration.defn for IOPanda
+
+# make sure that IOPanda was configured in with MPI and IEEEIO
+
+ifeq ($(strip $(HAVE_IEEEIO)), )
+$(NAME): MissingIEEEIO
+.pseudo: MissingIEEEIO
+MissingIEEEIO:
+	@echo "IOPanda: requires IEEEIO"
+	@echo "IOPanda: Please configure Cactus with thorn external/IEEEIO or remove IOPanda from Thornlist !"
+	exit 2
+endif
+
+ifeq ($(strip $(MPI_LIBS)), )
+$(NAME): MissingMPI
+.pseudo: MissingMPI
+MissingMPI:
+	@echo "IOPanda: requires MPI"
+	@echo "IOPanda: Please configure Cactus with MPI or remove IOPanda from Thornlist !"
+	exit 2
+endif
author	tradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001>	1999-10-05 01:24:27 +0000
committer	tradke <tradke@38c3d835-c875-442e-b0fe-21c19ce1d001>	1999-10-05 01:24:27 +0000
commit	3aa41187b549ff9a792d673e86efe5220848d73f (patch)
tree	0bdc27f2b94a7dfd29cf4d0c25c2cd819fd833a2 /src
parent	8a113f1371d777ca62b6c690e2f44bbebebd79c0 (diff)