Updated documentation on thorn IOHDF5, what it provides, how to use it etc.

Also added a section describing the file layout used in IOHDF5. Along with the example program CreateIOHDF5datafile.c, this should enable people to easily build their own data converters to feed external data into Cactus. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOHDF5/trunk@43 4825ed28-b72c-4eae-9704-e50c059e567d
author: tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d> 2001-03-14 15:08:40 +0000
committer: tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d> 2001-03-14 15:08:40 +0000
commit: cc4660a5ddc40f9ccfcc81ba7b16c1d6e5b551fb (patch)
tree: 8f7e1079bb698cd1bb85516ea368ae085c00056f /doc
parent: e2886911a1d7c76faece02a4b84f6fd5c2fc21bd (diff)
2 files changed, 328 insertions, 69 deletions
diff --git a/doc/CreateIOHDF5datafile.c b/doc/CreateIOHDF5datafile.c
new file mode 100644
index 0000000..5e1d597
--- /dev/null
+++ b/doc/CreateIOHDF5datafile.c
@@ -0,0 +1,199 @@
+ /*@@
+   @file      CreateIOHDF5datafile.c
+   @date      Mon 12 Mar 2001
+   @author    Thomas Radke
+   @desc
+              Example program to create an unchunked HDF5 datafile
+              with a single dataset which can be read as input data
+              into Cactus.
+   @enddesc
+   @version   $Id$
+ @@*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <hdf5.h>
+
+
+/* the name of our sample data file */
+#define DATAFILENAME  "x_3d.h5"
+
+/* the number of dimensions of our sample data array and its size */
+#define  NDIM         3
+#define  NSIZE        20
+
+/* maximum size of the dataset name */
+#define  MAXNAMESIZE  100
+
+/* the name of the attributes' group describing how the data file was created
+   and what type of Cactus variable the sample dataset is (a grid function)
+   These definitions were taken from Cactus header files - for ease of use
+   we didn't include these here. */
+#define GLOBAL_ATTRIBUTES_GROUP  "Global Attributes"
+#define CCTK_GF                  2
+
+/* a simple macro to do an HDF5 call with return code checking
+   in case of an error it will issue an error message and exit */
+#define CHECK_ERROR(hdf5_call)                                                \
+          do                                                                  \
+          {                                                                   \
+            int _error_code = hdf5_call;                                      \
+                                                                              \
+                                                                              \
+            if (_error_code < 0)                                              \
+            {                                                                 \
+              fprintf (stderr, "ERROR: line %d: HDF5 call '%s' returned "     \
+                               "error code %d\n",                             \
+                                __LINE__, #hdf5_call, _error_code);           \
+              return (-1);                                                    \
+            }                                                                 \
+          } while (0)
+
+
+ /*@@
+   @routine    main
+   @date       Mon 12 Mar 2001
+   @author     Thomas Radke
+   @desc
+               Main routine creating a sample HDF5 datafile
+   @enddesc
+
+   @returntype int
+   @returndesc
+               0 for success, negative return values indicate an error
+   @endreturndesc
+@@*/
+int main (void)
+{
+  void *data;
+  int i, elements;
+  int iteration, grouptype;
+  int timelevel, ntimelevels;
+  int nprocs, ioproc_every, unchunked;
+  hsize_t dims[NDIM];
+  char datasetname[MAXNAMESIZE], *varname, *groupname;
+  hid_t datafile, group, attr, dataset, dataspace, stringtype;
+
+
+  /* create a datafile (truncate if already exists) */
+  datafile = H5Fcreate (DATAFILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  if (datafile < 0)
+  {
+    fprintf (stderr, "Could not create output file '%s'\n", DATAFILENAME);
+    return (-1);
+  }
+
+  /* set the dimensions of our sample data array
+     count the number of elements */
+  elements = 1;
+  for (i = 0; i < NDIM; i++)
+  {
+    dims[i] = NSIZE;
+    elements *= NSIZE;
+  }
+  /* allocate the data array
+     we are lazy here and only initialize it to zero */
+  data = calloc (elements, sizeof (double));
+
+
+  /**************************************************************************/
+  /* create a group with attributes describing how the data in this file    */
+  /* was written                                                            */
+  /**************************************************************************/
+
+  CHECK_ERROR (dataspace = H5Screate (H5S_SCALAR));
+  CHECK_ERROR (group = H5Gcreate (datafile, GLOBAL_ATTRIBUTES_GROUP, 0));
+
+  /* we are writing unchunked data */
+  unchunked = 1;
+  CHECK_ERROR (attr = H5Acreate (group, "unchunked", H5T_NATIVE_INT,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &unchunked));
+  CHECK_ERROR (H5Aclose (attr));
+
+  /* the number of processors isn't really needed here
+     (only for chunked data) */
+  nprocs = 1;
+  CHECK_ERROR (attr = H5Acreate (group, "nprocs", H5T_NATIVE_INT,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &nprocs));
+  CHECK_ERROR (H5Aclose (attr));
+
+  /* the number of I/O processors isn't really needed here
+     (only for chunked data) */
+  ioproc_every = 1;
+  CHECK_ERROR (attr = H5Acreate (group, "ioproc_every", H5T_NATIVE_INT,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ioproc_every));
+  CHECK_ERROR (H5Aclose (attr));
+  CHECK_ERROR (H5Gclose (group));
+  CHECK_ERROR (H5Sclose (dataspace));
+
+
+  /**************************************************************************/
+  /* write your data as a dataset into the file                             */
+  /* the dataset name template has the format                               */
+  /*   "<full_varname> timelevel <timelevel> at iteration <iteration>"      */
+  /**************************************************************************/
+
+  /* the name of the grid variable (as specified in the interface.ccl file) */
+  varname = "grid::x";
+  /* the timelevel of the variable (set to 0 if there's just one timelevel) */
+  timelevel = 0;
+  /* iteration isn't really used here */
+  iteration = 0;
+  sprintf (datasetname, "%s timelevel %d at iteration %d",
+           varname, timelevel, iteration);
+
+  /* dataspace is given by dims[] */
+  CHECK_ERROR (dataspace = H5Screate_simple (NDIM, dims, NULL));
+  CHECK_ERROR (dataset = H5Dcreate (datafile, datasetname, H5T_NATIVE_DOUBLE,
+                                    dataspace, H5P_DEFAULT));
+
+  /* write the data */
+  CHECK_ERROR (H5Dwrite (dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL,
+                         H5P_DEFAULT, data));
+  CHECK_ERROR (H5Sclose (dataspace));
+
+
+  /**************************************************************************/
+  /* add the necessary attributes describing the dataset                    */
+  /* as a Cactus grid variable                                              */
+  /**************************************************************************/
+  CHECK_ERROR (dataspace = H5Screate (H5S_SCALAR));
+  CHECK_ERROR (stringtype = H5Tcopy (H5T_C_S1));
+
+  /* the variable's group name (as specified in the interface.ccl file) */
+  groupname = "grid::coordinates";
+  CHECK_ERROR (H5Tset_size (stringtype, strlen (groupname)));
+  CHECK_ERROR (attr = H5Acreate (dataset, "groupname", stringtype,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, stringtype, groupname));
+  CHECK_ERROR (H5Aclose (attr));
+
+  /* the variable's group type (as specified in the interface.ccl file) */
+  grouptype = CCTK_GF;
+  CHECK_ERROR (attr = H5Acreate (dataset, "grouptype", H5T_NATIVE_INT,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &grouptype));
+  CHECK_ERROR (H5Aclose (attr));
+
+  /* the number of timelevels of the variable (as specified in the
+     interface.ccl file) */
+  ntimelevels = 1;
+  CHECK_ERROR (attr = H5Acreate (dataset, "ntimelevels", H5T_NATIVE_INT,
+                                 dataspace, H5P_DEFAULT));
+  CHECK_ERROR (H5Awrite (attr, H5T_NATIVE_INT, &ntimelevels));
+  CHECK_ERROR (H5Aclose (attr));
+  CHECK_ERROR (H5Sclose (dataspace));
+
+
+  /* close remaining HDF5 objects and free allocated resources */
+  CHECK_ERROR (H5Dclose (dataset));
+  CHECK_ERROR (H5Tclose (stringtype));
+  CHECK_ERROR (H5Fclose (datafile));
+  free (data);
+
+  return (0);
+}
diff --git a/doc/documentation.tex b/doc/documentation.tex
index 1a617ab..6c0b57e 100644
--- a/doc/documentation.tex
+++ b/doc/documentation.tex
@@ -7,86 +7,146 @@
 \date{1999}
 \maketitle
 
-\abstract{Thorn IOHDF5 provides an IO method for output of variables in HDF5,
-the Hierarchical Data Format version 5 ({\tt http://hdf.ncsa.uiuc.edu/whatishdf5.html}).\\
-It also implements checkpointing/recovery functionality using HDF5.}
+\abstract{
+Thorn IOHDF5 provides an I/O method to output variables in HDF5 file format.
+It also implements checkpointing/recovery functionality using HDF5.
+}
 %
 \section{Purpose}
 %
-Thorn IOHDF5 makes HDF5 available to the Cactus user by registering an IO method
-called {\tt IOHDF5\_3D} with the IO interface in the flesh.\\
-This method creates three-dimensional output of 3D grid functions as well as
-plane output of scalar variables.
-Data is written in HDF5 file format and goes into files named
-{\tt "<varname>\_3D.h5"}. Such datafiles can be further processed by
-visualization tools like Amira or AVS.\\
-%
-\newline
-%
-You obtain output by an IO method by either
-%
+Thorn IOHDF5 uses the standard I/O library HDF5 (Hierarchical Data Format
+version 5, see {\tt http://hdf.ncsa.uiuc.edu/whatishdf5.html} for details)
+to output any type of grid variables (grid scalars, grid functions, and arrays
+of arbitrary dimension) in the HDF5 file format.\\
+
+Output is done by invoking the {\tt IOHDF5} I/O method which thorn IOHDF5
+registers with the flesh's I/O interface at startup.\\
+
+You obtain output by either
 \begin{itemize}
-  \item setting the appropriate IO parameters
-  \item calling one the routines of the IO function interface provided by the flesh
+  \item setting the appropriate I/O parameters in your parameter files, eg.
+\begin{verbatim}
+  IOHDF5::outHDF5_every = 10
+  IOHDF5::out_vars      = "wavetoy::phi"
+\end{verbatim}
+  \item calling one the flesh's I/O interface routines in your thorn's
+        code, eg. 
+\begin{verbatim}
+  CCTK_OutputVarByMethod (cctkGH, "wavetoy::phi", "IOHDF5");
+\end{verbatim}
 \end{itemize}
-%
-%
-IOHDF5 also provides checkpointing/recovery functionality by registering
-%
+
+Data is written into files named {\tt "<varname>.h5"}.
+Such datafiles can be used for further postprocessing (eg. visualization)
+or fed back into Cactus via the filereader capabilities of thorn IOUtil.\\[3ex]
+
+
+{\bf Parallel File I/O}\\
+
+According to the ouptput mode parameter settings ({\tt IO::out3D\_mode,
+IO::out3D\_unchunked, IO::out3D\_procs}) of thorn IOUtil, thorn IOHDF5
+will output distributed data either
 \begin{itemize}
-  \item a checkpoint method with the Runtime Function Repository in the flesh
-     if checkpointing was requested for this thorn.
-     This method is then scheduled at {\em CCTK\_CPINITIAL} and/or
-     {\em CCTK\_CHECKPOINT} (to checkpoint initial and/or evolution data).\\
-     Controled via checkpoint parameters, it decides when to save the current
-     state of simulation by dumping the contents of all Cactus variables and
-     parameters into a checkpoint file which is in HDF5 file format.\\
-     To write this file the thorn's IO method {\tt IOHDF5\_3D} is used.
-  \item a recovery method with the generic recovery function interface of thorn IOUtil.\\
-     At recovery time this method is called by IOUtil's generic recovery
-     routine. It gets passed a filename which is tested to identify a checkpoint
-     file in HDF5 format. If successful the method will then restore the
-     contents of all Cactus variables and parameters from the given checkpoint
-     file.
+  \item in serial into a single unchunked file
+\begin{verbatim}
+  IO::out3D_mode      = "onefile"
+  IO::out3D_unchunked = "yes"
+\end{verbatim}
+  \item in parallel, that is, into separate files containing chunks of the
+        individual processors' patches of the distributed array
+\begin{verbatim}
+  IO::out3D_mode      = "proc | np"
+\end{verbatim}
 \end{itemize}
-%
-For a description of IO and checkpoint/recovery parameters and the generic
-recovery function interface please see also the documentation of thorn IOUtil.\\
-For a description of IO function interface to invoke IO methods by application
-thorns please see the flesh documentation.
-%
-%
+The default is to output data in parallel, in order to get maximum I/O
+performance. If needed, you can recombine the resulting chunked datafiles
+into a single unchunked file using the recombiner utility program provided
+in {\tt IOHDF5/src/util/}.\\[3ex]
+
+
+{\bf Checkpointing \& Recovery}\\
+
+Thorn IOHDF5 can also be used for creating HDF5 checkpoint files and recovering
+from such files later on.\\
+
+Checkpoint routines are scheduled at several timebins so that you can save
+the current state of your simulation atfer the initial data phase,
+during evolution, or at termination.
+A recovery routine is registered with thorn IOUtil in order to restart
+a new simulation from a given HDF5 checkpoint.
+The very same recovery mechanism is used to implement a filereader
+functionality to feed back data into Cactus.\\
+
+Checkpointing and recovery are controlled by corresponding checkpoint/recovery
+parameters of thorn IOUtil (for a description of these parameters please refer
+to this thorn's documentation).
+
+
 \section{Comments}
 
-Since IOHDF5 uses parameters and the recovery function interface from IOUtil
-it also needs this I/O skeleton thorn compiled into Cactus and activated.\\
-%
-%
-\newline
-{\bf Building Cactus with HDF5}\\
-%
-The Cactus distribution does not contain the HDF5 header files and library which
-is used by thorn IOHDF5. So you need to configure it as an external software
-package via:
-%
+{\bf Importing external data into Cactus with IOHDF5}\\
+
+In order to import external data into Cactus (eg. to initialize some variable)
+you first need to convert this data into an HDF5 datafile which then can be
+processed by the registered recovery routine of thorn IOHDF5.\\
+
+The following description explains the HDF5 file layout of an unchunked
+datafile which thorn IOHDF5 expects in order to restore Cactus variables
+from it properly. There is also a well-documented example C program provided
+({\tt IOHDF5/doc/CreateIOHDF5datafile.c}) which illustrates how to create
+a datafile with IOHDF5 file layout. This working example can be used as a
+template for building your own data converter program.\\
+
+\begin{enumerate}
+  \item Actual data is stored as multidimensional datasets in an IOHDF5 file.
+        There is no nested grouping structure, every dataset is located
+        in the root group.\\
+        A dataset's name must match the following naming pattern which
+        guarantees to generate unique names:
 \begin{verbatim}
-  make <configuration>-config HDF5=YES
-                             [HDF5_DIR=<path to HDF5 package>]
+  "<full variable name> timelevel <timelevel> at iteration <iteration>"
 \end{verbatim}
-%
-The configuration script will look in some default places for an installed
-HDF5 package. If nothing is found this way you can explicitely specify it with
-the {\tt HDF5\_DIR} option.\\
-%
-Configure also checks which library version is contained within the HDF5
-package: it can be either serial or parallel. The latter version includes the parallel IO extensions of the MPI 2 standard. To make use of these extensions
-you need to configure Cactus with both HDF5 and MPI. Please make also sure then
-that the parallel HDF5 library was built with the same MPI version as is used
-for Cactus.\\
-%
-If Cactus was not configured to use HDF5 but has thorn IOHDF5 compiled in
-it will give a warning message each time a thorn's routine is called
-saying HDF5 I/O is not available.
+        IOHDF5's recovery routine parses a dataset's name according to this
+        pattern to determine the Cactus variable to restore, along with its
+        timelevel. The iteration number is just informative and not needed here.
+
+  \item The type of your data as well as its dimensions are already 
+        inherited by a dataset itself as metainformation. But this is not
+        enough for IOHDF5 to savely match it against a specific Cactus variable.
+        For that reason, the variable's groupname, its grouptype, and the
+        total number of timelevels must be attached to every dataset
+        as attribute information.
+
+  \item Finally, the recovery routine needs to know how the datafile to
+        recover from was created:
+        \begin{itemize}
+          \item Does the file contain chunked or unchunked data ?
+          \item How many processors were used to produce the data ?
+          \item How many I/O processors were used to write the data ?
+        \end{itemize}
+        Such information is put into as attributes into a group named\\
+        {\tt "Global Attributes"}. Since we assume unchunked data here
+        the processor information isn't relevant -- unchunked data can
+        be fed back into a Cactus simulation running on an arbitrary
+        number of processors.
+\end{enumerate}
+
+The example C program goes through all of these steps and creates a datafile
+{\tt x\_3d.h5} in IOHDF5 file layout which contains a single dataset named
+{\tt "grid::x timelevel 0 at iteration 0"}, with groupname
+{\tt "grid::coordinates"}, grouptype {\tt CCTK\_GF} (thus identifying the
+variable as a grid function), and the total number of timelevels set to 1.\\
+The global attributes are set to
+{\tt "unchunked" $=$ "yes", nprocs $=$ 1,} and {\tt ioproc\_every $=$ 1}.\\
+
+Once you built and ran the program you can easily verify this with
+\begin{verbatim}
+  h5dump x_3d.h5
+\end{verbatim}
+which lists all objects in the datafile along with their values.
+It will also dump the contents of the 3D dataset. Since it only contains zeros
+it would probably not make much sense to feed this datafile into Cactus for
+initializing your x coordinate grid function :-)
 %
 % Automatically created from the ccl files 
 % Do not worry for now.
author	tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d>	2001-03-14 15:08:40 +0000
committer	tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d>	2001-03-14 15:08:40 +0000
commit	cc4660a5ddc40f9ccfcc81ba7b16c1d6e5b551fb (patch)
tree	8f7e1079bb698cd1bb85516ea368ae085c00056f /doc
parent	e2886911a1d7c76faece02a4b84f6fd5c2fc21bd (diff)