diff options
Diffstat (limited to 'src/RecoverGH.c')
-rw-r--r-- | src/RecoverGH.c | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/src/RecoverGH.c b/src/RecoverGH.c new file mode 100644 index 0000000..9bdcf18 --- /dev/null +++ b/src/RecoverGH.c @@ -0,0 +1,403 @@ + /*@@ + @file RecoverGH.c + @date Tue Oct 10 2000 + @author Thomas Radke + @desc + Contains the routines to recover from a streamed HDF5 checkpoint. + + Currently can recover from: + (1) One file containing recombined data + (2) Multiple unrecombined files, where the current + number of processors and IO processors + match those used to write the files. + @enddesc + @version $Id$ + @@*/ + + +#include <stdlib.h> + +#include "cctk.h" +#include "cctk_Parameters.h" +#include "CactusPUGH/PUGH/src/include/pugh.h" +#include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h" +#include "ioStreamedHDF5GH.h" + + +/* the rcs ID and its dummy function to use it */ +static char *rcsid = "$Id$"; +CCTK_FILEVERSION(BetaThorns_IOStreamedHDF5_RecoverGH_c) + + +/* local function prototypes */ +static int IOStreamedHDF5_OpenFile (cGH *GH, + const char *basename, + int called_from, + fileinfo_t *fileinfo); + + + /*@@ + @routine IOStreamedHDF5_Recover + @date Tue Oct 10 2000 + @author Thomas Radke + @desc + Recovers a GH from a streamed HDF5 checkpoint file. + This routine is registered with IOUtil + as IOStreamedHDF5's recovery routine. + @enddesc + @var GH + @vdesc Pointer to CCTK grid hierarchy + @vtype cGH * + @vio in + @endvar + @var basename + @vdesc the basename containing the 'host:port' of the sending process + @vtype const char * + @vio in + @endvar + @var called_from + @vdesc flag indicating where this routine was called from + (either RECOVER or filereader) + @vtype int + @vio in + @endvar + + @calls IOStreamedHDF5_OpenFile + IOHDF5Util_RecoverParameters + IOHDF5Util_RecoverVariables + IOHDF5Util_RecoverGHextensions + CCTK_TimerStartI + CCTK_TimerStopI + IOUtil_PrintTimings + + @returntype int + @returndesc + -1 if there is no valid HDF5 file, + or the returncode of + @seeroutine IOHDF5Util_RecoverParameters or + @seeroutine IOHDF5Util_RecoverVariables or + @seeroutine IOHDF5Util_RecoverGHextensions + @endreturndesc +@@*/ +int IOStreamedHDF5_Recover (cGH *GH, + const char *basename, + int called_from) +{ + DECLARE_CCTK_PARAMETERS + int result; + ioStreamedHDF5GH *myGH; + static fileinfo_t fileinfo; /* this is static because info is passed from + CP_RECOVERY_PARAMETERS to CP_RECOVERY_DATA */ + + /* start the recovery timer if we were called at CCTK_RECOVER */ + myGH = (ioStreamedHDF5GH *) CCTK_GHExtension (GH, "IOStreamedHDF5"); + if (myGH && myGH->print_timing_info) + { + CCTK_TimerStartI (myGH->timers[RECOVERY_TIMER]); + } + + /* open the file if it wasn't already opened at CCTK_RECOVER_PARAMETERS */ + /* FIXME Gab ... asymmetric levfac */ + if (called_from == CP_RECOVER_PARAMETERS || + called_from == FILEREADER_DATA || + (GH && (GH->cctk_levfac[0] > 1 || GH->cctk_convlevel > 0))) + { + if (IOStreamedHDF5_OpenFile (GH, basename, called_from, &fileinfo) < 0) + { + return (-1); + } + } + else + { + /* This is the case for CP_RECOVER_DATA. + CCTK_RECOVER_PARAMETERS must have been called before + and set up the file info structure. */ + if (! fileinfo.is_HDF5_file) + { + return (-1); + } + } + + /* if called at CCTK_RECOVER_PARAMETERS + just do this and return (keeping the file open) */ + if (called_from == CP_RECOVER_PARAMETERS) + { + return (IOHDF5Util_RecoverParameters (&fileinfo)); + } + + /* Recover variables */ + if (verbose) + { + CCTK_VInfo (CCTK_THORNSTRING, "Recovering %schunked data with ioproc %d, " + "ioproc_every %d", fileinfo.unchunked ? "un" : "", + fileinfo.ioproc, fileinfo.ioproc_every); + } + result = IOHDF5Util_RecoverVariables (GH, &fileinfo); + + /* Recover GH extensions */ + if (result == 0 && called_from == CP_RECOVER_DATA) + { + if (verbose) + { + CCTK_INFO ("Recovering GH extensions"); + } + result = IOHDF5Util_RecoverGHextensions (GH, &fileinfo); + } + + /* Close the file */ + if (CCTK_MyProc (GH) == fileinfo.ioproc) + { + if (verbose) + { + if (called_from == CP_RECOVER_DATA) + { + CCTK_VInfo (CCTK_THORNSTRING, "Closing checkpoint file '%s' after " + "successful recovery", fileinfo.filename); + } + else + { + CCTK_VInfo (CCTK_THORNSTRING, "Closing data file '%s'", + fileinfo.filename); + } + } + IOHDF5_ERROR (H5Fclose (fileinfo.file)); + } + + /* free the allocated filename */ + if (fileinfo.filename) + { + free (fileinfo.filename); + } + + /* stop recovery timer and print timing info */ + if (called_from == CP_RECOVER_DATA && myGH->print_timing_info) + { + const char *timer_description = "Time to recover:"; + + + CCTK_TimerStopI (myGH->timers[RECOVERY_TIMER]); + IOUtil_PrintTimings ("Timing information for recovery in IOStreamedHDF5:", + 1, &myGH->timers[RECOVERY_TIMER], &timer_description); + } + + return (result); +} + + + /*@@ + @routine IOStreamedHDF5_RecoverParameters + @date Tue Oct 10 2000 + @author Thomas Radke + @desc + This routine is scheduled at CCTK_RECOVER_PARAMETERS. + It recovers the parameters from an HDF5 checkpoint file. + + Note that it cannot be registered with IOUtil to be scheduled + from there (as done with the IOStreamedHDF5_Recover routine) + because the registration mechanism isn't activated yet + at CCTK_RECOVER_PARAMETERS. + Instead we call the generic parameter recovery routine + from IOUtil here, and just pass the necessary callback function + and its arguments. + + Note also that this routine doesn't get passed any parameters, + not even a GH, because this doesn't exist yet at the time it is + being called. + @enddesc + + @calls IOUtil_RecoverParameters + + @returntype int + @returndesc + -1 if an invalid reocvery mode was set + or the returncode of + @seeroutine IOUtil_RecoverParameters + @endreturndesc +@@*/ +int IOStreamedHDF5_RecoverParameters (void) +{ + DECLARE_CCTK_PARAMETERS + int retval; + + + if (CCTK_Equals (recover, "auto")) + { + CCTK_WARN (2, "'IO::recover = \"auto\"' selected which doesn't make sense " + "for streamed HDF5 checkpoint files"); + retval = -1; + } + else + { + retval = IOUtil_RecoverParameters (IOStreamedHDF5_Recover, NULL, NULL); + } + + return (retval); +} + + +/**************************************************************************/ +/* local routines */ +/**************************************************************************/ + /*@@ + @routine IOStreamedHDF5_OpenFile + @date Tue Oct 10 2000 + @author Thomas Radke + @desc + Open a streamed HDF5 file given by its filename + (which has format 'host:port') and checks whether + we can recover from that file. + The file information is broadcasted by the IO processor(s) + to all other processors so that everyone knows how to proceed. + @enddesc + + @var GH + @vdesc Pointer to CCTK grid hierarchy + @vtype cGH * + @vio in + @endvar + @var basename + @vdesc basename of the HDF5 file to recover from + For streamed files this should be of format 'host:port'. + @vtype int + @vio in + @endvar + @var called_from + @vdesc flag indicating where this routine was called from + @vtype int + @vio in + @endvar + @var fileinfo + @vdesc pointer to structure describing the file + @vtype fileinfo_t * + @vio out + @endvar + + @returntype int + @returndesc + 0 for success, -1 if file could not be opened + @endreturndesc +@@*/ +static int IOStreamedHDF5_OpenFile (cGH *GH, + const char *basename, + int called_from, + fileinfo_t *fileinfo) +{ + DECLARE_CCTK_PARAMETERS + hid_t fapl; + hid_t group; + int nprocs, myproc; +#ifdef CCTK_MPI + MPI_Comm comm; + CCTK_INT4 info[3]; +#endif + + +#ifdef CCTK_MPI + /* Get the communicator for broadcasting the info structure */ + /* NOTE: When recovering parameters thorn PUGH is not yet initialized + so that we have to use MPI_COMM_WORLD in this case */ + comm = CCTK_GHExtensionHandle ("PUGH") < 0 ? + MPI_COMM_WORLD : PUGH_pGH (GH)->PUGH_COMM_WORLD; +#endif + + /* identify myself */ + nprocs = CCTK_nProcs (GH); + myproc = CCTK_MyProc (GH); + + /* use the basename (which should be of format 'host:port') as the filename */ + fileinfo->filename = strdup (basename); + + if (myproc == 0) + { + if (verbose) + { + CCTK_VInfo (CCTK_THORNSTRING, "Opening streamed HDF5 file from '%s'", + fileinfo->filename); + } + + /* set up file access property list and select Stream VFD */ + IOHDF5_ERROR (fapl = H5Pcreate (H5P_FILE_ACCESS)); + IOHDF5_ERROR (H5Pset_fapl_stream (fapl, NULL)); + + fileinfo->file = H5Fopen (fileinfo->filename, H5F_ACC_RDONLY, fapl); + IOHDF5_ERROR (H5Pclose (fapl)); + if (fileinfo->file < 0) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Cannot open streamed file HDF5 file from '%s'", + fileinfo->filename); + } + else if ((group = H5Gopen (fileinfo->file, GLOBAL_ATTRIBUTES_GROUP)) < 0) + { + CCTK_WARN (1, "Can't find global attributes group. " + "Is this really a Cactus HDF5 datafile ?"); + } + else + { + /* Determine how the data was written */ + READ_ATTRIBUTE (group, "nprocs", H5T_NATIVE_INT, &fileinfo->nprocs); + READ_ATTRIBUTE (group, "unchunked", H5T_NATIVE_INT, &fileinfo->unchunked); + READ_ATTRIBUTE (group, "ioproc_every", H5T_NATIVE_INT, + &fileinfo->ioproc_every); + + IOHDF5_ERROR (H5Gclose (group)); + + /* If we recover from chunked file(s) the number of + * writing processors must match the number of reading + * processors, and the total number of processors must match. + */ + + if ((fileinfo->ioproc_every == nprocs && nprocs > 1) || + fileinfo->unchunked) + { + if (verbose) + { + CCTK_VInfo (CCTK_THORNSTRING, "Recovering from one %schunked file", + fileinfo->unchunked ? "un":""); + } + fileinfo->ioproc_every = nprocs; + fileinfo->is_HDF5_file = 1; + } + else + { + if (fileinfo->nprocs != nprocs) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Must restart on %d processors with chunked files " + "or recombine them", fileinfo->nprocs); + } + else + { + if (verbose) + { + CCTK_VInfo (CCTK_THORNSTRING, "Recovering from %d chunked files", + nprocs / fileinfo->ioproc_every + + (nprocs % fileinfo->ioproc_every ? 1 : 0)); + } + fileinfo->is_HDF5_file = 1; + } + } + } + } + + if (myproc == 0 && ! fileinfo->is_HDF5_file) + { + CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, + "No valid HDF5 file '%s' found", fileinfo->filename); + } + +#ifdef CCTK_MPI + /* Broadcast the file information to all processors + Need to convert everything into CCTK_INTs which can be communicated. */ + info[0] = fileinfo->is_HDF5_file; + info[1] = fileinfo->unchunked; + info[2] = fileinfo->ioproc_every; + CACTUS_MPI_ERROR (MPI_Bcast (info, 3, PUGH_MPI_INT4, 0, comm)); + fileinfo->is_HDF5_file = info[0]; + fileinfo->unchunked = info[1]; + fileinfo->ioproc_every = info[2]; +#endif + + /* Return 0 for success otherwise negative */ + return (fileinfo->is_HDF5_file ? 0 : -1); +} |