aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authortradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d>2002-02-28 10:52:27 +0000
committertradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d>2002-02-28 10:52:27 +0000
commit91f29c7f9ea41731286fa36f7f3696437228dfb2 (patch)
tree087e91531f64a857b965e693a495af2f1daf2a4e /src
parent354eecccc7b74478c8ebcc69e0bb7f686a934a0b (diff)
Check whether all IO processors could successfully create a checkpoint.
Only then old checkpoint files are allowed to be removed. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOHDF5/trunk@105 4825ed28-b72c-4eae-9704-e50c059e567d
Diffstat (limited to 'src')
-rw-r--r--src/DumpGH.c62
1 files changed, 42 insertions, 20 deletions
diff --git a/src/DumpGH.c b/src/DumpGH.c
index 18ca6c0..5ede0f2 100644
--- a/src/DumpGH.c
+++ b/src/DumpGH.c
@@ -17,6 +17,9 @@
#include "CactusBase/IOUtil/src/ioGH.h"
#include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h"
#include "BetaThorns/IOHDF5Util/src/ioHDF5UtilGH.h"
+#ifdef CCTK_MPI
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#endif
#include "ioHDF5GH.h"
#include <stdlib.h>
@@ -174,14 +177,14 @@ void IOHDF5_TerminationCheckpoint (cGH *GH)
@@*/
static int IOHDF5_Checkpoint (cGH *GH, int called_from)
{
- DECLARE_CCTK_PARAMETERS
hid_t file;
int myproc;
- int retval;
+ CCTK_INT retval;
ioGH *ioUtilGH;
ioHDF5GH *myGH;
/* FIXME: allocate filenames dynamically */
char cp_filename[1024], cp_tempname[1024];
+ DECLARE_CCTK_PARAMETERS
retval = 0;
@@ -241,6 +244,19 @@ static int IOHDF5_Checkpoint (cGH *GH, int called_from)
file);
}
+#ifdef CCTK_MPI
+ /* find out whether all IO processors succeeded in writing the checkpoint */
+ if (ioUtilGH->nioprocs > 1)
+ {
+ CCTK_INT tmp;
+
+
+ tmp = retval;
+ CACTUS_MPI_ERROR (MPI_Allreduce (&tmp, &retval, 1, PUGH_MPI_INT, MPI_SUM,
+ PUGH_pGH (GH)->PUGH_COMM_WORLD));
+ }
+#endif
+
/* close the temporary checkpoint file and rename it to the real file */
if (myproc == ioUtilGH->ioproc)
{
@@ -256,30 +272,36 @@ static int IOHDF5_Checkpoint (cGH *GH, int called_from)
IOHDF5_ERROR (H5Fclose (file));
}
-#ifdef _WIN32
- /* Windows' rename(2) routine isn't POSIX compatible
- in that it would unlink an existing file :-( */
- unlink (cp_filename);
-#endif
- if (rename (cp_tempname, cp_filename))
- {
- CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Could not rename temporary checkpoint file '%s' to '%s'",
- cp_tempname, cp_filename);
- }
-
/* delete the oldest checkpoint file if checkpoint_keep_all isn't set
and put the new filename into the ring buffer */
- if (myGH->cp_filename_list[myGH->cp_filename_index])
+ if (retval == 0)
{
- if (! checkpoint_keep_all)
+#ifdef _WIN32
+ /* Windows' rename(2) routine isn't POSIX compatible
+ in that it would unlink an existing file :-( */
+ unlink (cp_filename);
+#endif
+ if (rename (cp_tempname, cp_filename))
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Could not rename temporary checkpoint file '%s' to '%s'",
+ cp_tempname, cp_filename);
+ retval = -1;
+ }
+ else
{
- remove (myGH->cp_filename_list[myGH->cp_filename_index]);
+ if (myGH->cp_filename_list[myGH->cp_filename_index])
+ {
+ if (! checkpoint_keep_all)
+ {
+ remove (myGH->cp_filename_list[myGH->cp_filename_index]);
+ }
+ free (myGH->cp_filename_list[myGH->cp_filename_index]);
+ }
+ myGH->cp_filename_list[myGH->cp_filename_index] = strdup (cp_filename);
+ myGH->cp_filename_index = (myGH->cp_filename_index+1) % checkpoint_keep;
}
- free (myGH->cp_filename_list[myGH->cp_filename_index]);
}
- myGH->cp_filename_list[myGH->cp_filename_index] = strdup (cp_filename);
- myGH->cp_filename_index = (myGH->cp_filename_index + 1) % checkpoint_keep;
}
/* stop the CP_TOTAL_TIMER timer and print timing information */