aboutsummaryrefslogtreecommitdiff
path: root/src/RecoverGH.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/RecoverGH.c')
-rw-r--r--src/RecoverGH.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/RecoverGH.c b/src/RecoverGH.c
new file mode 100644
index 0000000..0391b30
--- /dev/null
+++ b/src/RecoverGH.c
@@ -0,0 +1,343 @@
+ /*@@
+ @file RecoverGH.c
+ @date Fri Jun 19 09:14:22 1998
+ @author Tom Goodale
+ @desc
+ Contains the routines to do the internal checkpoint recovery.
+
+ Currently can recover from:
+ (1) One file containing recombined data
+ (2) Multiple unrecombined files, where the current
+ number of processors and outputing processors
+ match those used to write the data.
+ @enddesc
+ @history
+ @hauthor Gabrielle Allen @hdate 19 Oct 1998
+ @hdesc Changed names ready for thorn_IO
+ @endhistory
+ @version $Id$
+ @@*/
+
+static char *rcsid = "$Id$";
+
+#include <stdio.h>
+
+#include "cctk.h"
+#include "flesh.h"
+#include "declare_parameters.h"
+#include "GHExtensions.h"
+#include "WarnLevel.h"
+#include "Comm.h"
+#ifdef CACTUSBASE_PUGH
+#include "CactusBase/pugh/src/include/pugh.h"
+#endif
+#include "CactusBase/IOUtil/src/ioGH.h"
+#include "ioFlexGH.h"
+
+
+/* this one comes from RestoreFile.c */
+int IOFlexIO_RestoreIEEEIOfile (cGH *GH, IOFile ifp,
+ int IOrecover_ioproc,
+ int IOrecover_ioproc_every,
+ int IOrecover_unchunked);
+
+
+ /*@@
+ @routine IOFlexIO_RecoverGH
+ @date Fri Jun 19 09:22:52 1998
+ @author Tom Goodale
+ @desc
+ Recovers a GH.
+ @enddesc
+ @calls IOUtil_PrepareFilename IOFlexIO_RestoreIEEEIOfile
+ @calledby
+ @history
+ @hauthor Gabrielle Allen
+ @hdate Thu Jul 2 18:17:59 1998
+ @hdesc Restore the physical time and iteration count, pass myproc to
+ IEEEIOparamRestore
+ Derives the filename from IOUtil_PrepareFilename (in thorn IOUtil)
+ @hauthor Gabrielle Allen @hdate Oct 17 1998
+ @hdesc Added input of (some) GH structure variables
+ @endhistory
+
+@@*/
+
+int IOFlexIO_RecoverGH (cGH *GH, const char *basename, int called_from)
+{
+#ifdef CACTUSBASE_PUGH
+
+ DECLARE_PARAMETERS
+ IOFile ifp;
+ char ftmp [1024], fname [1024];
+ int proc, nprocs, myproc;
+ int index;
+ int is_IEEEIO_file;
+ Long nels_stored;
+ int nt_stored;
+ int iteration_stored;
+ int file_ioproc, file_ioproc_every;
+ int file_nprocs, file_unchunked;
+ CCTK_INT4 tmpInt;
+ char msg [512];
+ pGH *pughGH;
+ ioGH *ioUtilGH;
+ cTimer total_time, dataset_time, param_time;
+#ifdef MPI
+ CCTK_INT info [3];
+#endif
+
+
+ /* Get the handles for PUGH and IOUtil extensions */
+ pughGH = (pGH *) GH->extensions [CCTK_GetGHExtensionHandle ("PUGH")];
+ ioUtilGH = (ioGH *) GH->extensions [CCTK_GetGHExtensionHandle ("IO")];
+
+ /* identify myself */
+ nprocs = CCTK_GetnProcs (GH);
+ myproc = CCTK_GetMyProc (GH);
+
+ /* initialize timers */
+ CactusResetTimer (&total_time);
+ CactusResetTimer (&dataset_time);
+ CactusResetTimer (&param_time);
+
+ CactusStartTimer (&total_time);
+
+ /* Examine base file to find whether recovering from
+ * one (recombined) file or from multiple files
+ */
+
+ if (myproc == 0) {
+
+ /* Determine name of base file
+ NOTE: As we don't know whether the file is chunked or not
+ we need to try both file names. */
+ /* at first try with current chunking mode */
+ file_unchunked = ioUtilGH->unchunked;
+ IOUtil_PrepareFilename(GH, basename, fname, called_from, 0, file_unchunked);
+ if (called_from == CP_RECOVER_DATA)
+ strcat (fname, ".chkpt");
+ strcat (fname, ".ieee");
+
+ if (IO_verbose)
+ printf ("Opening file %s\n", fname);
+
+ /* Open file, make sure the file is valid */
+ ifp = IEEEopen (fname, "r");
+ if (IOisValid (ifp))
+ is_IEEEIO_file = 1;
+ else {
+ if (IO_verbose)
+ printf ("Cannot open file '%s'\n", fname);
+
+ /* now try with the other chunking mode */
+ file_unchunked = ! ioUtilGH->unchunked;
+ IOUtil_PrepareFilename (GH, basename, fname, called_from,
+ 0, file_unchunked);
+ if (called_from == CP_RECOVER_DATA)
+ strcat (fname, ".chkpt");
+ strcat (fname, ".ieee");
+ if (IO_verbose)
+ printf ("Trying now file '%s'...\n", fname);
+
+ /* Open file, make sure the file is valid */
+ ifp = IEEEopen (fname, "r");
+ is_IEEEIO_file = IOisValid (ifp);
+ }
+ }
+
+ if (myproc == 0 && is_IEEEIO_file) {
+ /* Now determine how the data was written */
+
+ /* Read nioprocs used to write data */
+ index = IOreadAttributeInfo (ifp, "GH$ioproc_every", &nt_stored, &nels_stored);
+ if (index >= 0 && nt_stored == FLEXIO_INT4 && nels_stored == 1) {
+ IOreadAttribute (ifp, index, &tmpInt);
+ file_ioproc_every = tmpInt;
+ } else {
+ CCTK_WARN (1, "Unable to restore GH$ioproc_every. "
+ "Assuming it is nprocs and continuing");
+ file_ioproc_every = nprocs;
+ }
+
+ /* Read nprocs used to write data */
+ index = IOreadAttributeInfo (ifp, "GH$nprocs", &nt_stored, &nels_stored);
+ if (index >= 0 && nt_stored == FLEXIO_INT4 && nels_stored == 1) {
+ IOreadAttribute (ifp, index, &tmpInt);
+ file_nprocs = tmpInt;
+ } else {
+ CCTK_WARN (1, "Unable to restore GH$nprocs. "
+ "Assuming it is 1 and continuing");
+ file_nprocs = 1;
+ }
+
+ /* Determine whether data is chunked or unchunked
+ We could derive this from the filename itself but just to be sure ... */
+ index = IOreadAttributeInfo (ifp, "unchunked", &nt_stored, &nels_stored);
+ if (index >= 0 && nt_stored == FLEXIO_INT4 && nels_stored == 1) {
+ IOreadAttribute (ifp, index, &tmpInt);
+ file_unchunked = tmpInt;
+ } else {
+ sprintf (msg, "Unable to restore 'unchunked' attribute. "
+ "Assuming it is %s and continuing",
+ file_unchunked ? "true" : "false");
+ CCTK_WARN (1, msg);
+ }
+
+ /* If we restore from multiple files
+ * the number of processors must match.
+ */
+
+ if (file_ioproc_every == nprocs || file_unchunked) {
+ if (IO_verbose)
+ printf ("Recovering from one %s file\n",
+ file_unchunked ? "unchunked" : "chunked");
+ } else {
+ if (file_nprocs != nprocs) {
+ sprintf (msg, "Must restart on %d processors with multiple files "
+ "or recombine them", file_nprocs);
+ CCTK_WARN (0, msg);
+ }
+ if (IO_verbose)
+ printf ("Recovering from %d chunked files\n",
+ nprocs / file_ioproc_every + (nprocs % file_ioproc_every?1:0));
+ }
+ }
+
+#ifdef MPI
+ /* Broadcast chunking mode to all processors from processor zero */
+ info [0] = is_IEEEIO_file;
+ info [1] = unchunked;
+ info [2] = file_ioproc_every;
+ CACTUS_MPI_ERROR (MPI_Bcast (info, 3, PUGH_MPI_INT, 0,
+ pughGH->PUGH_COMM_WORLD));
+ is_IEEEIO_file = info [0];
+ unchunked = info [1];
+ file_ioproc_every = info [2];
+#endif
+
+ /* return here to IOUtil if no valid file could be found */
+ if (! is_IEEEIO_file) {
+ if (myproc == 0) {
+ sprintf (msg, "No valid IEEEIO file '%s' found !", fname);
+ CCTK_WARN (2, msg);
+ }
+ return (-1);
+ }
+
+ /* Determine the IO processors for each node and the corresponding
+ checkpoint file */
+ file_ioproc = myproc - (myproc % file_ioproc_every);
+ IOUtil_PrepareFilename (GH, basename, fname, called_from,
+ file_ioproc/file_ioproc_every, file_unchunked);
+ if (called_from == CP_RECOVER_DATA)
+ strcat (fname, ".chkpt");
+ strcat (fname, ".ieee");
+
+ /* Open chunked files on other IO processors */
+ if (myproc != 0 && myproc == file_ioproc) {
+
+ if (IO_verbose)
+ printf ("Opening chunked file '%s' on processor %d.\n",
+ fname, myproc);
+
+ /* Open file, make sure the file is valid */
+ ifp = IEEEopen (fname, "r");
+ if (! IOisValid (ifp)) {
+ sprintf (msg, "Cannot open file '%s' on processor %d",
+ fname, myproc);
+ CCTK_WARN (0, msg);
+ }
+ }
+
+ /* Restore the data */
+ if (IO_verbose && myproc == 0)
+ printf ("Recovering %schunked data with ioproc %d, ioproc_every %d.\n",
+ file_unchunked ? "un" : "", file_ioproc, file_ioproc_every);
+
+ CactusStartTimer (&dataset_time);
+ IOFlexIO_RestoreIEEEIOfile (GH, ifp, file_ioproc, file_ioproc_every,
+ file_unchunked);
+ CactusStopTimer (&dataset_time);
+
+ /* Close the file. */
+ if (myproc == file_ioproc) {
+ if (IO_verbose)
+ printf ("Closing file '%s' after recovery.\n", fname);
+ IOclose (ifp);
+ }
+
+ if (called_from == CP_RECOVER_DATA) {
+ /* Must read in parameters and scalars on all processors. */
+ CactusStartTimer (&param_time);
+ for (proc = file_ioproc;
+ proc < file_ioproc+file_ioproc_every && proc < nprocs;
+ proc++) {
+
+ /* Only have the file open by one proc at any time. */
+ if (proc == myproc) {
+
+ /* Open file, make sure the file is valid */
+ ifp = IEEEopen (fname, "r");
+ if (! IOisValid (ifp)) {
+ sprintf (msg, "Cannot open checkpoint file '%s' on processor %d",
+ fname, myproc);
+ CCTK_WARN (0, msg);
+ }
+
+ /* Restore the parameters. */
+ if (IO_verbose)
+ printf ("Recovering parameters on processor %d.\n", myproc);
+/*** FIXME ***/
+#if 0
+ IO_IEEEIOparamRestore (ifp, myproc);
+
+ /* Restore the structure variables. */
+ if (IO_verbose)
+ printf ("Recovering GH variables.\n");
+ IO_IEEEIOStructRestore (GH, ifp);
+#endif
+
+ /* Restore global variables */
+
+ /* Get the iteration number. */
+ if (IO_verbose)
+ printf ("Recovering iteration number.\n");
+ index = IOreadAttributeInfo (ifp, "iteration", &nt_stored, &nels_stored);
+
+ if (index >= 0 && nt_stored == FLEXIO_INT4 && nels_stored == 1) {
+ IOreadAttribute (ifp, index, &iteration_stored);
+ if (IO_verbose)
+ printf ("Iteration number is %d\n", (int) iteration_stored);
+ GH->iteration = iteration_stored;
+ } else
+ printf ("*Warning* Unable to restore iteration number\n");
+
+ /* Close the file. */
+ if (IO_verbose)
+ printf ("Closing '%s' after recovery.\n", fname);
+ IOclose (ifp);
+ }
+
+ /* Synchronise all processors */
+ CCTK_Barrier (GH);
+ }
+ CactusStopTimer (&param_time);
+ }
+
+ /* print timing output */
+ if (IO_verbose && called_from == CP_RECOVER_DATA && myproc == 0) {
+ printf (
+ "----------------------------------------------------------------\n");
+/*** FIXME: choose right component of basic[] ***/
+ printf ("Time to restore data: %10.3lf sec\n",
+ dataset_time.total.basic [0]);
+ printf ("Time to restore parameters: %10.3lf sec\n",
+ param_time.total.basic [0]);
+ printf ("Time to recover from checkpoint: %10.3lf sec\n",
+ total_time.total.basic [0]);
+ }
+
+ return (0);
+#endif /* CACTUSBASE_PUGH */
+}