diff options
author | tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d> | 2002-02-28 10:52:27 +0000 |
---|---|---|
committer | tradke <tradke@4825ed28-b72c-4eae-9704-e50c059e567d> | 2002-02-28 10:52:27 +0000 |
commit | 91f29c7f9ea41731286fa36f7f3696437228dfb2 (patch) | |
tree | 087e91531f64a857b965e693a495af2f1daf2a4e /src | |
parent | 354eecccc7b74478c8ebcc69e0bb7f686a934a0b (diff) |
Check whether all IO processors could successfully create a checkpoint.
Only then old checkpoint files are allowed to be removed.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOHDF5/trunk@105 4825ed28-b72c-4eae-9704-e50c059e567d
Diffstat (limited to 'src')
-rw-r--r-- | src/DumpGH.c | 62 |
1 files changed, 42 insertions, 20 deletions
diff --git a/src/DumpGH.c b/src/DumpGH.c index 18ca6c0..5ede0f2 100644 --- a/src/DumpGH.c +++ b/src/DumpGH.c @@ -17,6 +17,9 @@ #include "CactusBase/IOUtil/src/ioGH.h" #include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h" #include "BetaThorns/IOHDF5Util/src/ioHDF5UtilGH.h" +#ifdef CCTK_MPI +#include "CactusPUGH/PUGH/src/include/pugh.h" +#endif #include "ioHDF5GH.h" #include <stdlib.h> @@ -174,14 +177,14 @@ void IOHDF5_TerminationCheckpoint (cGH *GH) @@*/ static int IOHDF5_Checkpoint (cGH *GH, int called_from) { - DECLARE_CCTK_PARAMETERS hid_t file; int myproc; - int retval; + CCTK_INT retval; ioGH *ioUtilGH; ioHDF5GH *myGH; /* FIXME: allocate filenames dynamically */ char cp_filename[1024], cp_tempname[1024]; + DECLARE_CCTK_PARAMETERS retval = 0; @@ -241,6 +244,19 @@ static int IOHDF5_Checkpoint (cGH *GH, int called_from) file); } +#ifdef CCTK_MPI + /* find out whether all IO processors succeeded in writing the checkpoint */ + if (ioUtilGH->nioprocs > 1) + { + CCTK_INT tmp; + + + tmp = retval; + CACTUS_MPI_ERROR (MPI_Allreduce (&tmp, &retval, 1, PUGH_MPI_INT, MPI_SUM, + PUGH_pGH (GH)->PUGH_COMM_WORLD)); + } +#endif + /* close the temporary checkpoint file and rename it to the real file */ if (myproc == ioUtilGH->ioproc) { @@ -256,30 +272,36 @@ static int IOHDF5_Checkpoint (cGH *GH, int called_from) IOHDF5_ERROR (H5Fclose (file)); } -#ifdef _WIN32 - /* Windows' rename(2) routine isn't POSIX compatible - in that it would unlink an existing file :-( */ - unlink (cp_filename); -#endif - if (rename (cp_tempname, cp_filename)) - { - CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, - "Could not rename temporary checkpoint file '%s' to '%s'", - cp_tempname, cp_filename); - } - /* delete the oldest checkpoint file if checkpoint_keep_all isn't set and put the new filename into the ring buffer */ - if (myGH->cp_filename_list[myGH->cp_filename_index]) + if (retval == 0) { - if (! checkpoint_keep_all) +#ifdef _WIN32 + /* Windows' rename(2) routine isn't POSIX compatible + in that it would unlink an existing file :-( */ + unlink (cp_filename); +#endif + if (rename (cp_tempname, cp_filename)) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Could not rename temporary checkpoint file '%s' to '%s'", + cp_tempname, cp_filename); + retval = -1; + } + else { - remove (myGH->cp_filename_list[myGH->cp_filename_index]); + if (myGH->cp_filename_list[myGH->cp_filename_index]) + { + if (! checkpoint_keep_all) + { + remove (myGH->cp_filename_list[myGH->cp_filename_index]); + } + free (myGH->cp_filename_list[myGH->cp_filename_index]); + } + myGH->cp_filename_list[myGH->cp_filename_index] = strdup (cp_filename); + myGH->cp_filename_index = (myGH->cp_filename_index+1) % checkpoint_keep; } - free (myGH->cp_filename_list[myGH->cp_filename_index]); } - myGH->cp_filename_list[myGH->cp_filename_index] = strdup (cp_filename); - myGH->cp_filename_index = (myGH->cp_filename_index + 1) % checkpoint_keep; } /* stop the CP_TOTAL_TIMER timer and print timing information */ |