diff options
author | Thomas Radke <tradke@aei.mpg.de> | 2006-02-17 16:09:00 +0000 |
---|---|---|
committer | Thomas Radke <tradke@aei.mpg.de> | 2006-02-17 16:09:00 +0000 |
commit | 17b958352d6f13f80b2ba76e201c842a7902f80f (patch) | |
tree | 492e39e2f4167ca794b6ab8a371992ae19e76f00 | |
parent | 67daef6ba786b90af045d39fa3f1759f2203ff0e (diff) |
CarpetIOHDF5: better optimisation during recovery
When recovering from a checkpoint, each processor now continuously reads through
all chunked files until all grid variables on this processor have been fully
recovered. This should always minimise the number of individual checkpoint files
necessary to open on each processor.
darcs-hash:20060217160928-776a0-28c076749861c0b26d1c41a6f4ef3bdb00c23274.gz
-rw-r--r-- | Carpet/CarpetIOHDF5/src/Input.cc | 59 |
1 files changed, 35 insertions, 24 deletions
diff --git a/Carpet/CarpetIOHDF5/src/Input.cc b/Carpet/CarpetIOHDF5/src/Input.cc index 505e92e7b..098a872fa 100644 --- a/Carpet/CarpetIOHDF5/src/Input.cc +++ b/Carpet/CarpetIOHDF5/src/Input.cc @@ -225,6 +225,25 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) } } + // query for groups which have the 'CHECKPOINT = "no"' option set + // Such groups are not checked against being read completely. + vector<bool> not_checkpointed(numvars); + if (in_recovery) { + for (unsigned int vindex = 0; vindex < not_checkpointed.size(); vindex++) { + int gindex = CCTK_GroupIndexFromVarI (vindex); + int tagstable = CCTK_GroupTagsTableI (gindex); + int const len = Util_TableGetString (tagstable, 0, NULL, "checkpoint"); + if (len > 0) { + char* value = new char[len + 1]; + Util_TableGetString (tagstable, len + 1, value, "checkpoint"); + if (len == sizeof ("no") - 1 and CCTK_Equals (value, "no")) { + not_checkpointed[vindex] = true; + } + delete[] value; + } + } + } + // create a bbox set for each group to list how much needs to be read const int numgroups = CCTK_NumGroups (); vector<vector<ibset> > group_bboxes (numgroups); @@ -248,13 +267,6 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) // loop over all input files of this set for (unsigned int i = 0; i < fileset->files.size(); i++) { - // some optimisation for the case when recovering on the same number - // of processors as during the checkpoint: - // read only this processor's chunked file, skip all others - if (fileset->nioprocs == dist::size() and i > 0) { - break; - } - const int file_idx = (i + fileset->first_ioproc) % fileset->nioprocs; file_t& file = fileset->files[file_idx]; @@ -267,6 +279,11 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) HDF5_ERROR (file.file = H5Fopen (file.filename, H5F_ACC_RDONLY, H5P_DEFAULT)); + if (CCTK_Equals (verbose, "full")) { + CCTK_VInfo (CCTK_THORNSTRING, "opening %s file '%s'", + in_recovery ? "checkpoint" : "input", file.filename); + } + // browse through all datasets contained in this file HDF5_ERROR (H5Giterate (file.file, "/", NULL, BrowseDatasets, &file)); } @@ -339,13 +356,20 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) } // check if all variables have been read completely already - bool all_read_completely = true; + bool all_done = true; for (unsigned int vindex = 0; vindex < read_completely.size(); vindex++) { + + // skip all variables which aren't expected to be recovered + if (not_checkpointed[vindex] or + (CCTK_GroupTypeFromVarI (vindex) != CCTK_GF and reflevel > 0)) { + continue; + } + for (unsigned int tl = 0; tl < read_completely[vindex].size(); tl++) { - all_read_completely &= read_completely[vindex][tl]; + all_done &= read_completely[vindex][tl]; } } - if (all_read_completely) { + if (all_done) { break; } } @@ -358,19 +382,6 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) continue; } - bool not_checkpointed = false; - int gindex = CCTK_GroupIndexFromVarI (vindex); - int tagstable = CCTK_GroupTagsTableI (gindex); - int const len = Util_TableGetString (tagstable, 0, NULL, "checkpoint"); - if (len > 0) { - char* value = new char[len + 1]; - Util_TableGetString (tagstable, len + 1, value, "checkpoint"); - if (len == sizeof ("no") - 1 and CCTK_Equals (value, "no")) { - not_checkpointed = true; - } - delete[] value; - } - for (unsigned int tl = 0; tl < read_completely[vindex].size(); tl++) { if (called_from == FILEREADER_DATA and not (ioUtilGH->do_inVars and ioUtilGH->do_inVars[vindex])) { @@ -385,7 +396,7 @@ int Recover (cGH* cctkGH, const char *basefilename, int called_from) } char* fullname = CCTK_FullName (vindex); if (size == 0) { - if (not_checkpointed) { + if (not_checkpointed[vindex]) { CCTK_VWarn (4, __LINE__, __FILE__, CCTK_THORNSTRING, "variable '%s' timelevel %d has not been read " "(variable has option tag \"CHECKPOINT = 'no'\")", |