diff options
author | Thomas Radke <tradke@aei.mpg.de> | 2006-02-06 18:38:00 +0000 |
---|---|---|
committer | Thomas Radke <tradke@aei.mpg.de> | 2006-02-06 18:38:00 +0000 |
commit | e8dd3974cc9ec67ec41ae4bdd788822b0b264765 (patch) | |
tree | 53cb8088aac77810e9fbe2d350b3e811ec88d47e | |
parent | 34ef15bf8538f2e0fc6a7b91158f700a63cbbba1 (diff) |
CarpetIOHDF5: bugfix for writing checkpoints
Accumulate any low-level errors returned by HDF5 library calls and check them
after writing a checkpoint. Do not remove an existing checkpoint if there were
any low-level errors in generating the previous one.
darcs-hash:20060206183846-776a0-549e715d7a3fceafe70678aaf1329052dce724bb.gz
-rw-r--r-- | Carpet/CarpetIOHDF5/src/CarpetIOHDF5.cc | 29 | ||||
-rw-r--r-- | Carpet/CarpetIOHDF5/src/CarpetIOHDF5.hh | 7 | ||||
-rw-r--r-- | Carpet/CarpetIOHDF5/src/Input.cc | 8 | ||||
-rw-r--r-- | Carpet/CarpetIOHDF5/src/Output.cc | 46 |
4 files changed, 55 insertions, 35 deletions
diff --git a/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.cc b/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.cc index 55b21aaf5..58f413ec0 100644 --- a/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.cc +++ b/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.cc @@ -48,6 +48,8 @@ static void GetVarIndex (int vindex, const char* optstring, void* arg); static void CheckSteerableParameters (const cGH *const cctkGH, CarpetIOHDF5GH *myGH); static void WarnAboutDeprecatedParameters (void); +static int WriteMetadata (const cGH *cctkGH, int nioprocs, + bool called_from_checkpoint, hid_t file); ////////////////////////////////////////////////////////////////////////////// // public routines @@ -246,6 +248,7 @@ static void* SetupGH (tFleshConfig* const fleshconfig, { DECLARE_CCTK_PARAMETERS; + int error_count = 0; // register CarpetIOHDF5's routines as a new I/O method const int IOMethod = CCTK_RegisterIOMethod ("IOHDF5"); @@ -536,6 +539,7 @@ static int OutputVarAs (const cGH* const cctkGH, const char* const fullname, DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; + int error_count = 0; int vindex = -1; if (CCTK_TraverseString (fullname, GetVarIndex, &vindex, CCTK_VAR) < 0) { @@ -646,7 +650,7 @@ static int OutputVarAs (const cGH* const cctkGH, const char* const fullname, HDF5_ERROR (file = H5Fcreate (c_filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)); // write metadata information - WriteMetadata (cctkGH, nioprocs, false, file); + error_count += WriteMetadata (cctkGH, nioprocs, false, file); } else { HDF5_ERROR (file = H5Fopen (c_filename, H5F_ACC_RDWR, H5P_DEFAULT)); } @@ -683,7 +687,7 @@ static int OutputVarAs (const cGH* const cctkGH, const char* const fullname, static int Checkpoint (const cGH* const cctkGH, int called_from) { - int retval = 0; + int error_count = 0; DECLARE_CCTK_PARAMETERS; @@ -713,7 +717,7 @@ static int Checkpoint (const cGH* const cctkGH, int called_from) H5P_DEFAULT)); // write metadata information - WriteMetadata (cctkGH, nioprocs, true, file); + error_count += WriteMetadata (cctkGH, nioprocs, true, file); } // now dump the grid variables on all mglevels, reflevels, maps and components @@ -793,7 +797,7 @@ static int Checkpoint (const cGH* const cctkGH, int called_from) } // write the var - retval += parallel_io ? + error_count += parallel_io ? WriteVarChunkedParallel (cctkGH, file, request, true) : WriteVarChunkedSequential (cctkGH, file, request, true); } @@ -816,16 +820,16 @@ static int Checkpoint (const cGH* const cctkGH, int called_from) } // get global error count - int temp = retval; - MPI_Allreduce (&temp, &retval, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + int temp = error_count; + MPI_Allreduce (&temp, &error_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - if (retval == 0) { + if (error_count == 0) { if (file >= 0) { if (rename (tempname, filename)) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not rename temporary checkpoint file '%s' to '%s'", tempname, filename); - retval = -1; + error_count = -1; } else if (called_from == CP_EVOLUTION_DATA and checkpoint_keep > 0) { CarpetIOHDF5GH *myGH = (CarpetIOHDF5GH *) CCTK_GHExtension (cctkGH, CCTK_THORNSTRING); @@ -864,14 +868,15 @@ static int Checkpoint (const cGH* const cctkGH, int called_from) free (tempname); free (filename); - return retval; + return error_count; } // Checkpoint -void WriteMetadata (const cGH *cctkGH, int nioprocs, - bool called_from_checkpoint, hid_t file) +static int WriteMetadata (const cGH *cctkGH, int nioprocs, + bool called_from_checkpoint, hid_t file) { + int error_count = 0; hid_t group, scalar_dataspace, array_dataspace, datatype, attr; DECLARE_CCTK_PARAMETERS; @@ -990,6 +995,8 @@ void WriteMetadata (const cGH *cctkGH, int nioprocs, HDF5_ERROR (H5Sclose (scalar_dataspace)); HDF5_ERROR (H5Sclose (array_dataspace)); HDF5_ERROR (H5Gclose (group)); + + return (error_count); } diff --git a/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.hh b/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.hh index b7c5e0be8..13b710b86 100644 --- a/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.hh +++ b/Carpet/CarpetIOHDF5/src/CarpetIOHDF5.hh @@ -45,6 +45,7 @@ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, \ "HDF5 call '%s' returned error code %d", \ #fn_call, _error_code); \ + error_count++; \ } \ } @@ -100,12 +101,6 @@ namespace CarpetIOHDF5 const ioRequest* const request, bool called_from_checkpoint); - // adds simulation metadata (inclusive parameters) to an HDF5 file - void WriteMetadata (const cGH* const cctkGH, - int nioprocs, - bool called_from_checkpoint, - hid_t file); - // returns an HDF5 datatype corresponding to the given CCTK datatype hid_t CCTKtoHDF5_Datatype (const cGH* const cctkGH, int cctk_type, diff --git a/Carpet/CarpetIOHDF5/src/Input.cc b/Carpet/CarpetIOHDF5/src/Input.cc index f3a4c889b..ec531559a 100644 --- a/Carpet/CarpetIOHDF5/src/Input.cc +++ b/Carpet/CarpetIOHDF5/src/Input.cc @@ -124,6 +124,7 @@ int CarpetIOHDF5_SetNumRefinementLevels (void) ////////////////////////////////////////////////////////////////////////////// int CarpetIOHDF5_CloseFiles (void) { + int error_count = 0; DECLARE_CCTK_PARAMETERS; @@ -145,7 +146,7 @@ int CarpetIOHDF5_CloseFiles (void) filesets.clear(); - return (0); + return (-error_count); } @@ -156,6 +157,7 @@ int CarpetIOHDF5_CloseFiles (void) ////////////////////////////////////////////////////////////////////////////// int Recover (cGH* cctkGH, const char *basefilename, int called_from) { + int error_count = 0; DECLARE_CCTK_PARAMETERS; @@ -423,6 +425,7 @@ static list<fileset_t>::iterator OpenFileSet (const cGH* const cctkGH, { file_t file; fileset_t fileset; + int error_count = 0; DECLARE_CCTK_PARAMETERS; // first try to open a chunked file written on this processor @@ -519,6 +522,7 @@ static list<fileset_t>::iterator OpenFileSet (const cGH* const cctkGH, ////////////////////////////////////////////////////////////////////////////// static void ReadMetadata (fileset_t& fileset, hid_t file) { + int error_count = 0; DECLARE_CCTK_PARAMETERS; fileset.nioprocs = 1; @@ -592,6 +596,7 @@ static void ReadMetadata (fileset_t& fileset, hid_t file) ////////////////////////////////////////////////////////////////////////////// static herr_t BrowseDatasets (hid_t group, const char *objectname, void *arg) { + int error_count = 0; file_t *file = (file_t *) arg; patch_t patch; hid_t dataset, dataspace, attr, attrtype; @@ -683,6 +688,7 @@ static int ReadVar (const cGH* const cctkGH, vector<ibset> &bboxes_read, bool in_recovery) { + int error_count = 0; DECLARE_CCTK_PARAMETERS; const int gindex = CCTK_GroupIndexFromVarI (patch->vindex); diff --git a/Carpet/CarpetIOHDF5/src/Output.cc b/Carpet/CarpetIOHDF5/src/Output.cc index 272150c40..0756eb757 100644 --- a/Carpet/CarpetIOHDF5/src/Output.cc +++ b/Carpet/CarpetIOHDF5/src/Output.cc @@ -20,10 +20,10 @@ using namespace Carpet; // add attributes to an HDF5 dataset -static void AddAttributes (const cGH *const cctkGH, const char *fullname, - int vdim, int refinementlevel, - const ioRequest* const request, - const ibbox& bbox, hid_t dataset); +static int AddAttributes (const cGH *const cctkGH, const char *fullname, + int vdim, int refinementlevel, + const ioRequest* const request, + const ibbox& bbox, hid_t dataset); int WriteVarUnchunked (const cGH* const cctkGH, @@ -33,6 +33,7 @@ int WriteVarUnchunked (const cGH* const cctkGH, { DECLARE_CCTK_PARAMETERS; + int error_count = 0; char *fullname = CCTK_FullName(request->vindex); const int gindex = CCTK_GroupIndexFromVarI (request->vindex); assert (gindex >= 0 and gindex < (int) Carpet::arrdata.size ()); @@ -216,8 +217,9 @@ int WriteVarUnchunked (const cGH* const cctkGH, // (have to do it inside of the COMPONENT_LOOP so that we have // access to the cGH elements) if (first_time) { - AddAttributes (cctkGH, fullname, group.dim, refinementlevel, - request, *bbox, dataset); + error_count += AddAttributes (cctkGH, fullname, group.dim, + refinementlevel, request, *bbox, + dataset); first_time = false; } } @@ -251,7 +253,8 @@ int WriteVarUnchunked (const cGH* const cctkGH, free (fullname); - return 0; + // return the number of errors that occured during this output + return error_count; } @@ -262,6 +265,7 @@ int WriteVarChunkedSequential (const cGH* const cctkGH, { DECLARE_CCTK_PARAMETERS; + int error_count = 0; char *fullname = CCTK_FullName(request->vindex); const int gindex = CCTK_GroupIndexFromVarI (request->vindex); assert (gindex >= 0 and gindex < (int) Carpet::arrdata.size ()); @@ -378,8 +382,8 @@ int WriteVarChunkedSequential (const cGH* const cctkGH, HDF5_ERROR (H5Sclose (dataspace)); HDF5_ERROR (H5Dwrite (dataset, memdatatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)); - AddAttributes (cctkGH, fullname, group.dim, refinementlevel, - request, bbox, dataset); + error_count += AddAttributes (cctkGH, fullname, group.dim, + refinementlevel, request, bbox,dataset); HDF5_ERROR (H5Dclose (dataset)); } @@ -393,7 +397,8 @@ int WriteVarChunkedSequential (const cGH* const cctkGH, free (fullname); - return 0; + // return the number of errors that occured during this output + return error_count; } @@ -404,6 +409,7 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, { DECLARE_CCTK_PARAMETERS; + int error_count = 0; char *fullname = CCTK_FullName(request->vindex); const int gindex = CCTK_GroupIndexFromVarI (request->vindex); assert (gindex >= 0 and gindex < (int) Carpet::arrdata.size ()); @@ -514,8 +520,8 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, HDF5_ERROR (H5Sclose (dataspace)); HDF5_ERROR (H5Dwrite (dataset, memdatatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)); - AddAttributes (cctkGH, fullname, group.dim, refinementlevel, - request, bbox, dataset); + error_count += AddAttributes (cctkGH, fullname, group.dim,refinementlevel, + request, bbox, dataset); HDF5_ERROR (H5Dclose (dataset)); if (data != mydata) free (data); @@ -525,16 +531,19 @@ int WriteVarChunkedParallel (const cGH* const cctkGH, free (fullname); - return 0; + // return the number of errors that occured during this output + return error_count; } // add attributes to an HDF5 dataset -static void AddAttributes (const cGH *const cctkGH, const char *fullname, - int vdim, int refinementlevel, - const ioRequest* request, - const ibbox& bbox, hid_t dataset) +static int AddAttributes (const cGH *const cctkGH, const char *fullname, + int vdim, int refinementlevel, + const ioRequest* request, + const ibbox& bbox, hid_t dataset) { + int error_count = 0; + // Legacy arguments hid_t attr, dataspace, datatype; HDF5_ERROR (dataspace = H5Screate (H5S_SCALAR)); @@ -637,6 +646,9 @@ static void AddAttributes (const cGH *const cctkGH, const char *fullname, HDF5_ERROR (H5Tclose (datatype)); HDF5_ERROR (H5Sclose (dataspace)); + + // return the number of errors that occured during this output + return error_count; } |