aboutsummaryrefslogtreecommitdiff
path: root/src/RecoverGH.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/RecoverGH.c')
-rw-r--r--src/RecoverGH.c403
1 files changed, 403 insertions, 0 deletions
diff --git a/src/RecoverGH.c b/src/RecoverGH.c
new file mode 100644
index 0000000..9bdcf18
--- /dev/null
+++ b/src/RecoverGH.c
@@ -0,0 +1,403 @@
+ /*@@
+ @file RecoverGH.c
+ @date Tue Oct 10 2000
+ @author Thomas Radke
+ @desc
+ Contains the routines to recover from a streamed HDF5 checkpoint.
+
+ Currently can recover from:
+ (1) One file containing recombined data
+ (2) Multiple unrecombined files, where the current
+ number of processors and IO processors
+ match those used to write the files.
+ @enddesc
+ @version $Id$
+ @@*/
+
+
+#include <stdlib.h>
+
+#include "cctk.h"
+#include "cctk_Parameters.h"
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h"
+#include "ioStreamedHDF5GH.h"
+
+
+/* the rcs ID and its dummy function to use it */
+static char *rcsid = "$Id$";
+CCTK_FILEVERSION(BetaThorns_IOStreamedHDF5_RecoverGH_c)
+
+
+/* local function prototypes */
+static int IOStreamedHDF5_OpenFile (cGH *GH,
+ const char *basename,
+ int called_from,
+ fileinfo_t *fileinfo);
+
+
+ /*@@
+ @routine IOStreamedHDF5_Recover
+ @date Tue Oct 10 2000
+ @author Thomas Radke
+ @desc
+ Recovers a GH from a streamed HDF5 checkpoint file.
+ This routine is registered with IOUtil
+ as IOStreamedHDF5's recovery routine.
+ @enddesc
+ @var GH
+ @vdesc Pointer to CCTK grid hierarchy
+ @vtype cGH *
+ @vio in
+ @endvar
+ @var basename
+ @vdesc the basename containing the 'host:port' of the sending process
+ @vtype const char *
+ @vio in
+ @endvar
+ @var called_from
+ @vdesc flag indicating where this routine was called from
+ (either RECOVER or filereader)
+ @vtype int
+ @vio in
+ @endvar
+
+ @calls IOStreamedHDF5_OpenFile
+ IOHDF5Util_RecoverParameters
+ IOHDF5Util_RecoverVariables
+ IOHDF5Util_RecoverGHextensions
+ CCTK_TimerStartI
+ CCTK_TimerStopI
+ IOUtil_PrintTimings
+
+ @returntype int
+ @returndesc
+ -1 if there is no valid HDF5 file,
+ or the returncode of
+ @seeroutine IOHDF5Util_RecoverParameters or
+ @seeroutine IOHDF5Util_RecoverVariables or
+ @seeroutine IOHDF5Util_RecoverGHextensions
+ @endreturndesc
+@@*/
+int IOStreamedHDF5_Recover (cGH *GH,
+ const char *basename,
+ int called_from)
+{
+ DECLARE_CCTK_PARAMETERS
+ int result;
+ ioStreamedHDF5GH *myGH;
+ static fileinfo_t fileinfo; /* this is static because info is passed from
+ CP_RECOVERY_PARAMETERS to CP_RECOVERY_DATA */
+
+ /* start the recovery timer if we were called at CCTK_RECOVER */
+ myGH = (ioStreamedHDF5GH *) CCTK_GHExtension (GH, "IOStreamedHDF5");
+ if (myGH && myGH->print_timing_info)
+ {
+ CCTK_TimerStartI (myGH->timers[RECOVERY_TIMER]);
+ }
+
+ /* open the file if it wasn't already opened at CCTK_RECOVER_PARAMETERS */
+ /* FIXME Gab ... asymmetric levfac */
+ if (called_from == CP_RECOVER_PARAMETERS ||
+ called_from == FILEREADER_DATA ||
+ (GH && (GH->cctk_levfac[0] > 1 || GH->cctk_convlevel > 0)))
+ {
+ if (IOStreamedHDF5_OpenFile (GH, basename, called_from, &fileinfo) < 0)
+ {
+ return (-1);
+ }
+ }
+ else
+ {
+ /* This is the case for CP_RECOVER_DATA.
+ CCTK_RECOVER_PARAMETERS must have been called before
+ and set up the file info structure. */
+ if (! fileinfo.is_HDF5_file)
+ {
+ return (-1);
+ }
+ }
+
+ /* if called at CCTK_RECOVER_PARAMETERS
+ just do this and return (keeping the file open) */
+ if (called_from == CP_RECOVER_PARAMETERS)
+ {
+ return (IOHDF5Util_RecoverParameters (&fileinfo));
+ }
+
+ /* Recover variables */
+ if (verbose)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Recovering %schunked data with ioproc %d, "
+ "ioproc_every %d", fileinfo.unchunked ? "un" : "",
+ fileinfo.ioproc, fileinfo.ioproc_every);
+ }
+ result = IOHDF5Util_RecoverVariables (GH, &fileinfo);
+
+ /* Recover GH extensions */
+ if (result == 0 && called_from == CP_RECOVER_DATA)
+ {
+ if (verbose)
+ {
+ CCTK_INFO ("Recovering GH extensions");
+ }
+ result = IOHDF5Util_RecoverGHextensions (GH, &fileinfo);
+ }
+
+ /* Close the file */
+ if (CCTK_MyProc (GH) == fileinfo.ioproc)
+ {
+ if (verbose)
+ {
+ if (called_from == CP_RECOVER_DATA)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Closing checkpoint file '%s' after "
+ "successful recovery", fileinfo.filename);
+ }
+ else
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Closing data file '%s'",
+ fileinfo.filename);
+ }
+ }
+ IOHDF5_ERROR (H5Fclose (fileinfo.file));
+ }
+
+ /* free the allocated filename */
+ if (fileinfo.filename)
+ {
+ free (fileinfo.filename);
+ }
+
+ /* stop recovery timer and print timing info */
+ if (called_from == CP_RECOVER_DATA && myGH->print_timing_info)
+ {
+ const char *timer_description = "Time to recover:";
+
+
+ CCTK_TimerStopI (myGH->timers[RECOVERY_TIMER]);
+ IOUtil_PrintTimings ("Timing information for recovery in IOStreamedHDF5:",
+ 1, &myGH->timers[RECOVERY_TIMER], &timer_description);
+ }
+
+ return (result);
+}
+
+
+ /*@@
+ @routine IOStreamedHDF5_RecoverParameters
+ @date Tue Oct 10 2000
+ @author Thomas Radke
+ @desc
+ This routine is scheduled at CCTK_RECOVER_PARAMETERS.
+ It recovers the parameters from an HDF5 checkpoint file.
+
+ Note that it cannot be registered with IOUtil to be scheduled
+ from there (as done with the IOStreamedHDF5_Recover routine)
+ because the registration mechanism isn't activated yet
+ at CCTK_RECOVER_PARAMETERS.
+ Instead we call the generic parameter recovery routine
+ from IOUtil here, and just pass the necessary callback function
+ and its arguments.
+
+ Note also that this routine doesn't get passed any parameters,
+ not even a GH, because this doesn't exist yet at the time it is
+ being called.
+ @enddesc
+
+ @calls IOUtil_RecoverParameters
+
+ @returntype int
+ @returndesc
+ -1 if an invalid reocvery mode was set
+ or the returncode of
+ @seeroutine IOUtil_RecoverParameters
+ @endreturndesc
+@@*/
+int IOStreamedHDF5_RecoverParameters (void)
+{
+ DECLARE_CCTK_PARAMETERS
+ int retval;
+
+
+ if (CCTK_Equals (recover, "auto"))
+ {
+ CCTK_WARN (2, "'IO::recover = \"auto\"' selected which doesn't make sense "
+ "for streamed HDF5 checkpoint files");
+ retval = -1;
+ }
+ else
+ {
+ retval = IOUtil_RecoverParameters (IOStreamedHDF5_Recover, NULL, NULL);
+ }
+
+ return (retval);
+}
+
+
+/**************************************************************************/
+/* local routines */
+/**************************************************************************/
+ /*@@
+ @routine IOStreamedHDF5_OpenFile
+ @date Tue Oct 10 2000
+ @author Thomas Radke
+ @desc
+ Open a streamed HDF5 file given by its filename
+ (which has format 'host:port') and checks whether
+ we can recover from that file.
+ The file information is broadcasted by the IO processor(s)
+ to all other processors so that everyone knows how to proceed.
+ @enddesc
+
+ @var GH
+ @vdesc Pointer to CCTK grid hierarchy
+ @vtype cGH *
+ @vio in
+ @endvar
+ @var basename
+ @vdesc basename of the HDF5 file to recover from
+ For streamed files this should be of format 'host:port'.
+ @vtype int
+ @vio in
+ @endvar
+ @var called_from
+ @vdesc flag indicating where this routine was called from
+ @vtype int
+ @vio in
+ @endvar
+ @var fileinfo
+ @vdesc pointer to structure describing the file
+ @vtype fileinfo_t *
+ @vio out
+ @endvar
+
+ @returntype int
+ @returndesc
+ 0 for success, -1 if file could not be opened
+ @endreturndesc
+@@*/
+static int IOStreamedHDF5_OpenFile (cGH *GH,
+ const char *basename,
+ int called_from,
+ fileinfo_t *fileinfo)
+{
+ DECLARE_CCTK_PARAMETERS
+ hid_t fapl;
+ hid_t group;
+ int nprocs, myproc;
+#ifdef CCTK_MPI
+ MPI_Comm comm;
+ CCTK_INT4 info[3];
+#endif
+
+
+#ifdef CCTK_MPI
+ /* Get the communicator for broadcasting the info structure */
+ /* NOTE: When recovering parameters thorn PUGH is not yet initialized
+ so that we have to use MPI_COMM_WORLD in this case */
+ comm = CCTK_GHExtensionHandle ("PUGH") < 0 ?
+ MPI_COMM_WORLD : PUGH_pGH (GH)->PUGH_COMM_WORLD;
+#endif
+
+ /* identify myself */
+ nprocs = CCTK_nProcs (GH);
+ myproc = CCTK_MyProc (GH);
+
+ /* use the basename (which should be of format 'host:port') as the filename */
+ fileinfo->filename = strdup (basename);
+
+ if (myproc == 0)
+ {
+ if (verbose)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Opening streamed HDF5 file from '%s'",
+ fileinfo->filename);
+ }
+
+ /* set up file access property list and select Stream VFD */
+ IOHDF5_ERROR (fapl = H5Pcreate (H5P_FILE_ACCESS));
+ IOHDF5_ERROR (H5Pset_fapl_stream (fapl, NULL));
+
+ fileinfo->file = H5Fopen (fileinfo->filename, H5F_ACC_RDONLY, fapl);
+ IOHDF5_ERROR (H5Pclose (fapl));
+ if (fileinfo->file < 0)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Cannot open streamed file HDF5 file from '%s'",
+ fileinfo->filename);
+ }
+ else if ((group = H5Gopen (fileinfo->file, GLOBAL_ATTRIBUTES_GROUP)) < 0)
+ {
+ CCTK_WARN (1, "Can't find global attributes group. "
+ "Is this really a Cactus HDF5 datafile ?");
+ }
+ else
+ {
+ /* Determine how the data was written */
+ READ_ATTRIBUTE (group, "nprocs", H5T_NATIVE_INT, &fileinfo->nprocs);
+ READ_ATTRIBUTE (group, "unchunked", H5T_NATIVE_INT, &fileinfo->unchunked);
+ READ_ATTRIBUTE (group, "ioproc_every", H5T_NATIVE_INT,
+ &fileinfo->ioproc_every);
+
+ IOHDF5_ERROR (H5Gclose (group));
+
+ /* If we recover from chunked file(s) the number of
+ * writing processors must match the number of reading
+ * processors, and the total number of processors must match.
+ */
+
+ if ((fileinfo->ioproc_every == nprocs && nprocs > 1) ||
+ fileinfo->unchunked)
+ {
+ if (verbose)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Recovering from one %schunked file",
+ fileinfo->unchunked ? "un":"");
+ }
+ fileinfo->ioproc_every = nprocs;
+ fileinfo->is_HDF5_file = 1;
+ }
+ else
+ {
+ if (fileinfo->nprocs != nprocs)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Must restart on %d processors with chunked files "
+ "or recombine them", fileinfo->nprocs);
+ }
+ else
+ {
+ if (verbose)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Recovering from %d chunked files",
+ nprocs / fileinfo->ioproc_every +
+ (nprocs % fileinfo->ioproc_every ? 1 : 0));
+ }
+ fileinfo->is_HDF5_file = 1;
+ }
+ }
+ }
+ }
+
+ if (myproc == 0 && ! fileinfo->is_HDF5_file)
+ {
+ CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "No valid HDF5 file '%s' found", fileinfo->filename);
+ }
+
+#ifdef CCTK_MPI
+ /* Broadcast the file information to all processors
+ Need to convert everything into CCTK_INTs which can be communicated. */
+ info[0] = fileinfo->is_HDF5_file;
+ info[1] = fileinfo->unchunked;
+ info[2] = fileinfo->ioproc_every;
+ CACTUS_MPI_ERROR (MPI_Bcast (info, 3, PUGH_MPI_INT4, 0, comm));
+ fileinfo->is_HDF5_file = info[0];
+ fileinfo->unchunked = info[1];
+ fileinfo->ioproc_every = info[2];
+#endif
+
+ /* Return 0 for success otherwise negative */
+ return (fileinfo->is_HDF5_file ? 0 : -1);
+}