#include #include #include #include #include #include #include #include #include #include #include #include #include #include "cctk.h" #include "cctk_Parameters.h" #include "cctk_Version.h" extern "C" { static const char* rcsid = "$Header:$"; CCTK_FILEVERSION(Carpet_CarpetIOHDF5_iohdf5chckpt_recover_cc); } #include "CactusBase/IOUtil/src/ioGH.h" #include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h" #include "bbox.hh" #include "data.hh" #include "gdata.hh" #include "ggf.hh" #include "vect.hh" #include "carpet.hh" #include "iohdf5.hh" #include "iohdf5GH.h" /* some macros for HDF5 group names */ #define PARAMETERS_GLOBAL_ATTRIBUTES_GROUP "Parameters and Global Attributes" #define ALL_PARAMETERS "All Parameters" namespace CarpetIOHDF5 { using namespace std; using namespace Carpet; // linked list for reading in the checkpoint file list datasetnamelist; int Checkpoint (const cGH* const cctkGH, int called_from); int DumpParametersGHExtentions (const cGH *cctkGH, int all, hid_t writer); int RecoverParameters (hid_t reader); int RecoverGHextensions (cGH* cctkGH, hid_t reader); int RecoverVariables (cGH* cctkGH, hid_t reader); void CarpetIOHDF5InitialDataCheckpoint( const cGH* const cgh){ DECLARE_CCTK_PARAMETERS; if (checkpoint && checkpoint_ID) { CCTK_INFO ("---------------------------------------------------------"); CCTK_INFO ("Dumping initial data checkpoint"); CCTK_INFO ("---------------------------------------------------------"); Checkpoint (cgh, CP_INITIAL_DATA); } } // CarpetIOHDF5_InitialDataCheckpoint void CarpetIOHDF5EvolutionCheckpoint( const cGH* const cgh){ DECLARE_CCTK_PARAMETERS; // Test checkpoint_every, and adjust it. This is necessary until // recovery is more flexible. int every_full = pow(2.0,maxreflevels-1); if (checkpoint_every>0 && (checkpoint_every % every_full) != 0) { int every = (checkpoint_every / every_full + 1) * every_full; ostringstream outevery; outevery << every; CCTK_ParameterSet("checkpoint_every","IOUtil", outevery.str().c_str()); CCTK_VInfo (CCTK_THORNSTRING,"I have adjusted your checkpoint_every to %i.",every); } if (checkpoint && ((checkpoint_every > 0 && cgh->cctk_iteration % checkpoint_every == 0) || checkpoint_next)) { CCTK_INFO ("---------------------------------------------------------"); CCTK_VInfo (CCTK_THORNSTRING, "Dumping periodic checkpoint at " "iteration %d", cgh->cctk_iteration); CCTK_INFO ("---------------------------------------------------------"); Checkpoint (cgh, CP_EVOLUTION_DATA); if (checkpoint_next) { CCTK_ParameterSet ("checkpoint_next", CCTK_THORNSTRING, "no"); } } } // CarpetIOHDF5_EvolutionCheckpoint int CarpetIOHDF5RecoverParameters(void){ // Register with the Cactus Recovery Interface return (IOUtil_RecoverParameters (CarpetIOHDF5_Recover, ".h5", "HDF5")); } int CarpetIOHDF5_Recover (cGH* cctkGH, const char *basefilename, int called_from) { DECLARE_CCTK_PARAMETERS; int result=0; int myproc=-1; CarpetIOHDF5GH *myGH; static hid_t reader=0; //this thing absolutely needs to be static!!! myGH = NULL; myproc = CCTK_MyProc (cctkGH); if (called_from == CP_RECOVER_PARAMETERS) { // Okay, let's see what we can do about the parameters // Invent a file name ostringstream filenamebuf; // if(CCTK_nProcs(cctkGH) == 1) filenamebuf << recover_dir << "/" << basefilename << ".h5"; //else // filenamebuf << recover_dir << "/" << basefilename << ".file_0.h5"; string filenamestr = filenamebuf.str(); const char * const filename = filenamestr.c_str(); if (myproc == 0) { // First, open the file if (verbose) CCTK_VInfo(CCTK_THORNSTRING, "Opening Checkpoint file %s for recovery",filename); reader = H5Fopen (filename, H5F_ACC_RDONLY, H5P_DEFAULT); if (reader<0) { CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not recover from \"%s\"", filename); } } // myproc == 0 } else { /* This is the case for CP_RECOVER_DATA. CCTK_RECOVER_PARAMETERS must have been called before and set up the file info structure. */ if (myproc == 0) { assert(reader>=0); } } if (called_from == CP_RECOVER_PARAMETERS) { return (RecoverParameters (reader)); } if (called_from == CP_RECOVER_DATA) { CCTK_INT4 numberofmgtimes=0; CCTK_VInfo(CCTK_THORNSTRING,"Starting to recover data on reflevel %d!!!",reflevel); if (myproc == 0) { // Use refinement levels parameter from checkpointing file? if (use_reflevels_from_checkpoint && reflevel==0) { herr_t herr; hid_t group = H5Gopen (reader, PARAMETERS_GLOBAL_ATTRIBUTES_GROUP); assert(group >= 0); hid_t dataset = H5Dopen (group, ALL_PARAMETERS); assert(dataset>= 0); int reflevels_chkpt; ReadAttribute (dataset, "carpet_reflevels", reflevels_chkpt); herr = H5Dclose(dataset); assert(!herr); herr = H5Gclose(group); assert(!herr); ostringstream reflevels_str; reflevels_str << reflevels_chkpt; CCTK_ParameterSet ("refinement_levels","CarpetRegrid",reflevels_str.str().c_str()); CCTK_VInfo (CCTK_THORNSTRING, "Using %i reflevels read from checkpoint file. Ignoring value in parameter file.", reflevels_chkpt); } /* we need all the times on the individual levels */ // these are a bit messy to extract // Actually, we do have to do this only once hid_t group = H5Gopen (reader, PARAMETERS_GLOBAL_ATTRIBUTES_GROUP); assert(group>=0); hid_t dataset = H5Dopen (group, ALL_PARAMETERS); assert(dataset >= 0); hid_t attr = H5Aopen_name (dataset, "numberofmgtimes"); assert(attr >= 0); hid_t atype = H5Aget_type (attr); if(H5Tequal(atype, H5T_NATIVE_INT)) { herr_t herr = H5Aread(attr, atype, &numberofmgtimes); assert(!herr); herr = H5Aclose(attr); assert(numberofmgtimes==mglevels); char buffer[100]; for(int lcv=0;lcv=0); atype = H5Aget_type (attr); assert (atype>=0); herr = H5Aread (attr, atype, &leveltimes.at(lcv).at(0)); assert(!herr); herr = H5Aclose(attr); assert(!herr); } herr = H5Dclose(dataset); assert(!herr); herr = H5Gclose(group); assert(!herr); } else { CCTK_WARN(0,"BAD BAD BAD! Can't read leveltimes!!"); } } // myproc == 0 // communicate the time on all the mglevels and reflevels int mpierr = MPI_Bcast (&numberofmgtimes, 1, CARPET_MPI_INT4, 0,MPI_COMM_WORLD); assert(!mpierr); for(int i=0;icctk_time = leveltimes.at(mglevel).at(reflevel); result += RecoverGHextensions(cctkGH,reader); if (verbose) { cout << "reflevel: " << reflevel << endl; } result += RecoverVariables (cctkGH,reader); CCTK_VInfo (CCTK_THORNSTRING, "Restarting simulation at iteration %d (physical time %g)", cctkGH->cctk_iteration, (double) cctkGH->cctk_time); } // called_from == CP_RECOVER_DATA if (reflevel==maxreflevels) { if(myproc == 0) { H5Fclose(reader); } } return (result); } // CarpetIOHDF5_Recover int RecoverVariables (cGH* cctkGH, hid_t reader) { DECLARE_CCTK_PARAMETERS; int retval = 0; int myproc = CCTK_MyProc (cctkGH); char * name; int ndatasets; int varindex; double datasettime; double leveltime; static double totaltime; hid_t dataset; herr_t herr; list refleveldatasetnamelist; if (reflevel==0) { totaltime = 0; } leveltime = MPI_Wtime(); if(myproc==0) { ndatasets=GetnDatasets(reader); assert (ndatasets>=0); } // Broadcast number of datasets MPI_Bcast (&ndatasets, 1, MPI_INT, 0, dist::comm); assert ((ndatasets)>=0); //if (verbose && reflevel==0) cout << "ndatasets: " << ndatasets << endl; if ( reflevel == 0) { cout << "ndatasets: " << ndatasets << endl; } if (reflevel==0) { for (int currdataset=0;currdataset::iterator currdataset; currdataset=datasetnamelist.begin(); while(currdataset!=datasetnamelist.end()) { char tempstr[10]; sprintf(tempstr,"rl=%d m=",reflevel); if ( (*currdataset).find(tempstr) < (*currdataset).size() ) { refleveldatasetnamelist.push_back((*currdataset).c_str()); currdataset = datasetnamelist.erase(currdataset); } else { ++currdataset; } // if ... } // while ... } // if(myproc==0) static long reflevelnamenum; // static keeps this thing local if(myproc==0) { reflevelnamenum=refleveldatasetnamelist.size(); } MPI_Bcast (&reflevelnamenum, 1, MPI_LONG, 0, dist::comm); assert ((reflevelnamenum)>=0); // fill bogus namelist for non-IO cpus if(myproc !=0) { for(long i=0;i < reflevelnamenum;i++) { refleveldatasetnamelist.push_back("blah"); } } comparetime = MPI_Wtime() - comparetime; cout << "Time for string comparison: " << comparetime << endl; cout << "I have for this reflevel " << refleveldatasetnamelist.size() << endl; list::iterator currdataset; currdataset=refleveldatasetnamelist.begin(); while(currdataset!=refleveldatasetnamelist.end()) { // cout << "name: " << (*currdataset).c_str() << endl; if (myproc==0) { dataset = H5Dopen (reader, (*currdataset).c_str()); assert(dataset); // Read data ReadAttribute (dataset, "name", name); varindex = CCTK_VarIndex(name); } MPI_Bcast (&varindex, 1, MPI_INT, 0, dist::comm); name = CCTK_FullName(varindex); if (verbose) { cout << name << " rl: " << reflevel << endl; } vector regions_read(Carpet::maps); assert (varindex>=0 && varindex=0); hid_t dataset = H5Dopen (group, ALL_PARAMETERS); assert(dataset >= 0); ReadAttribute(dataset,"GH$iteration",int4Buffer[0]); ReadAttribute(dataset,"main loop index",int4Buffer[1]); ReadAttribute(dataset,"carpet_global_time",realBuffer); // ReadAttribute(dataset,"carpet_reflevels",int4Buffer[2]); ReadAttribute(dataset,"carpet_delta_time",realBuffer2); herr_t herr = H5Dclose(dataset); assert(!herr); herr = H5Gclose(group); assert(!herr); } /* Broadcast the GH extensions to all processors */ /* NOTE: We have to use MPI_COMM_WORLD here because PUGH_COMM_WORLD is not yet set up at parameter recovery time. We also assume that PUGH_MPI_INT4 is a compile-time defined datatype. */ mpierr = MPI_Bcast (int4Buffer, 3, CARPET_MPI_INT4, 0,MPI_COMM_WORLD); assert(!mpierr); mpierr = MPI_Bcast (int4Buffer, 3, CARPET_MPI_INT4, 0,MPI_COMM_WORLD); assert(!mpierr); mpierr = MPI_Bcast (&realBuffer, 1, CARPET_MPI_REAL,0,MPI_COMM_WORLD); assert(!mpierr); mpierr = MPI_Bcast (&realBuffer2, 1, CARPET_MPI_REAL,0,MPI_COMM_WORLD); assert(!mpierr); global_time = (CCTK_REAL) realBuffer; delta_time = (CCTK_REAL) realBuffer2; // reflevels = (int) int4Buffer[2]; cctkGH->cctk_iteration = (int) int4Buffer[0]; CCTK_SetMainLoopIndex ((int) int4Buffer[1]); return (0); } // RecoverGHExtensions int RecoverParameters(hid_t reader){ DECLARE_CCTK_PARAMETERS; int myproc, retval; char *parameters; CCTK_INT4 parameterSize; hid_t group,dataset; herr_t herr; int mpierr; myproc = CCTK_MyProc (NULL); if (myproc == 0){ CCTK_VInfo (CCTK_THORNSTRING, "Recovering parameters from checkpoint "); group = H5Gopen (reader, PARAMETERS_GLOBAL_ATTRIBUTES_GROUP); assert(group >= 0); dataset = H5Dopen (group, ALL_PARAMETERS); assert(dataset>= 0); parameterSize = H5Dget_storage_size (dataset); parameters = (char *) malloc (parameterSize); herr = H5Dread (dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, parameters); assert(!herr); herr = H5Dclose(dataset); assert(!herr); herr = H5Gclose(group); assert(!herr); if(verbose) { CCTK_VInfo (CCTK_THORNSTRING, "\n%s\n",parameters); } CCTK_VInfo(CCTK_THORNSTRING, "Successfully recovered parameters!"); } // myproc == 0 /* Broadcast the parameter buffer size to all processors */ /* NOTE: We have to use MPI_COMM_WORLD here because CARPET_COMM_WORLD is not yet set up at parameter recovery time. We also assume that CARPET_MPI_INT4 is a compile-time defined datatype. */ mpierr = MPI_Bcast (¶meterSize, 1, CARPET_MPI_INT4, 0, MPI_COMM_WORLD); assert(!mpierr); if (parameterSize > 0) { if (myproc) { parameters = (char*) malloc (parameterSize + 1); } mpierr = MPI_Bcast (parameters, parameterSize + 1, CARPET_MPI_CHAR, 0,MPI_COMM_WORLD); assert(!mpierr); IOUtil_SetAllParameters (parameters); free (parameters); } /* return positive value for success otherwise negative */ retval = (parameterSize > 0 ? 1 : -1); return (retval); } // RecoverParameters int Checkpoint (const cGH* const cctkGH, int called_from) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; herr_t herr; int retval = 0; const ioGH *ioUtilGH; ioRequest *request; CarpetIOHDF5GH *myGH; myGH = (CarpetIOHDF5GH *) CCTK_GHExtension (cctkGH, "CarpetIOHDF5"); // check if CarpetIOHDF5 was registered as I/O method if (myGH == NULL) { CCTK_WARN (0, "No CarpetIOHDF5 I/O methods registered"); return -1; } int const myproc = CCTK_MyProc (cctkGH); // Invent a file name ostringstream cp_tempname_buf; ostringstream cp_filename_buf; switch (called_from) { case CP_INITIAL_DATA: cp_tempname_buf << checkpoint_dir << "/tmp_" << checkpoint_ID_file << ".it_" << cctkGH->cctk_iteration << ".h5"; cp_filename_buf << checkpoint_dir << "/" << checkpoint_ID_file << ".it_" << cctkGH->cctk_iteration << ".h5"; break; case CP_EVOLUTION_DATA: cp_tempname_buf << checkpoint_dir << "/tmp_" << checkpoint_file << ".it_" << cctkGH->cctk_iteration << ".h5"; cp_filename_buf << checkpoint_dir << "/" << checkpoint_file << ".it_" << cctkGH->cctk_iteration << ".h5"; break; case CP_RECOVER_DATA: case CP_RECOVER_PARAMETERS: case FILEREADER_DATA: cp_tempname_buf << recover_dir << "/tmp_" << recover_file << ".h5"; cp_filename_buf << recover_dir << "/" << recover_file << ".h5"; break; default: CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, "IOUtil_PrepareFilename: Unknown calling mode %d", called_from); break; } string const cp_tempname_str = cp_tempname_buf.str(); string const cp_filename_str = cp_filename_buf.str(); char const * const cp_tempname = cp_tempname_str.c_str(); char const * const cp_filename = cp_filename_str.c_str(); hid_t writer = -1; if (myproc == 0) { if (verbose) { CCTK_VInfo (CCTK_THORNSTRING, "Creating temporary checkpoint file '%s'", cp_tempname); } CCTK_VInfo (CCTK_THORNSTRING, "Creating temporary checkpoint file '%s'", cp_tempname); CCTK_VInfo (CCTK_THORNSTRING, "Verbose = %d", verbose); writer = H5Fcreate (cp_tempname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); assert (writer>=0); herr = H5Fclose (writer); assert (!herr); writer = -1; // Now open the file writer = H5Fopen (cp_tempname, H5F_ACC_RDWR, H5P_DEFAULT); assert (writer>=0); // Dump all parameters and GHExtentions retval += DumpParametersGHExtentions (cctkGH, 1, writer); assert(!retval); } // myproc == 0 // now dump the grid variables on all mglevels, reflevels, maps and components BEGIN_MGLEVEL_LOOP(cctkGH) { BEGIN_REFLEVEL_LOOP(cctkGH) { if (verbose) { CCTK_INFO ("Dumping Grid Variables ..."); } for (int group = CCTK_NumGroups () - 1; group >= 0; group--) { /* only dump groups which have storage assigned */ if (CCTK_QueryGroupStorageI (cctkGH, group) <= 0) { continue; } const int grouptype = CCTK_GroupTypeI(group); /* scalars and grid arrays only have 1 reflevel: */ if ( (grouptype != CCTK_GF) && (reflevel != 0) ) { continue; } /* now check if there is any memory allocated for GFs and GAs. GSs should always have memory allocated and there is at this point no CCTK function to check this :/ */ if ( (grouptype == CCTK_GF) || (grouptype == CCTK_ARRAY)){ const int gpdim = CCTK_GroupDimI(group); int gtotalsize=1; vector tlsh(gpdim); assert(!CCTK_GrouplshGI(cctkGH,gpdim,&tlsh[0],group)); for(int i=0;icheck_exist = 0; request->hdatatype = gdata.vartype; for (request->hdim = 0; request->hdim < request->vdim; request->hdim++) { request->downsample[request->hdim] = 1; } /* loop over all variables in this group */ for (request->vindex = first_vindex; request->vindex < first_vindex + gdata.numvars; request->vindex++) { /* loop over all timelevels of this variable */ for (request->timelevel = 0; request->timelevel < gdata.numtimelevels; request->timelevel++) { if (verbose) { char *fullname = CCTK_FullName (request->vindex); if (fullname != NULL) { CCTK_VInfo (CCTK_THORNSTRING, " %s (timelevel %d)", fullname, request->timelevel); free (fullname); } else { CCTK_VWarn(1, __LINE__, __FILE__, CCTK_THORNSTRING, "Invalid variable with varindex %d", request->vindex); } } // write the var if (grouptype == CCTK_ARRAY || grouptype == CCTK_GF || grouptype == CCTK_SCALAR) { char* fullname = CCTK_FullName (request->vindex); if (verbose) CCTK_VInfo (CCTK_THORNSTRING,"%s: reflevel: %d map: %d component: %d grouptype: %d ", fullname,reflevel,Carpet::map,component,grouptype); free(fullname); retval += WriteVar(cctkGH,writer,request,1); } else { char *fullname = CCTK_FullName (request->vindex); CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Invalid group type %d for variable '%s'", grouptype, fullname); free(fullname); retval = -1; } } } /* end of loop over all variables */ } /* end of loop over all groups */ } END_REFLEVEL_LOOP; } END_MGLEVEL_LOOP; if (myproc==0) { // Close the file herr = H5Fclose(writer); assert(!herr); } if (retval == 0) { if (myproc==0) { if (rename (cp_tempname, cp_filename)) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not rename temporary checkpoint file '%s' to '%s'", cp_tempname, cp_filename); retval = -1; } else { if (myGH->cp_filename_list[myGH->cp_filename_index]) { if (checkpoint_keep > 0) { remove (myGH->cp_filename_list[myGH->cp_filename_index]); } free (myGH->cp_filename_list[myGH->cp_filename_index]); } myGH->cp_filename_list[myGH->cp_filename_index] = strdup (cp_filename); myGH->cp_filename_index = (myGH->cp_filename_index+1) % abs (checkpoint_keep); } // else } // myproc == 0 } // retval == 0 return retval; } // Checkpoint int DumpParametersGHExtentions (const cGH *cctkGH, int all, hid_t writer) { // large parts of this routine were taken from // Thomas Radke's IOHDF5Util. Thanks Thomas! DECLARE_CCTK_PARAMETERS; char *parameters; hid_t group, dataspace, dataset; hsize_t size; herr_t herr; CCTK_INT4 itmp; CCTK_REAL dtmp; const char *version; ioGH *ioUtilGH; if (verbose) { CCTK_INFO ("Dumping Parameters and GH Extentions..."); } /* get the parameter string buffer */ parameters = IOUtil_GetAllParameters (cctkGH, all); if (parameters) { size = strlen (parameters) + 1; group = H5Gcreate (writer, PARAMETERS_GLOBAL_ATTRIBUTES_GROUP, 0); assert(group>=0); dataspace = H5Screate_simple (1, &size, NULL); assert(dataspace>=0); dataset = H5Dcreate (group, ALL_PARAMETERS, H5T_NATIVE_UCHAR, dataspace, H5P_DEFAULT); assert(dataset>=0); herr = H5Dwrite (dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, parameters); assert(!herr); // now dump the GH Extentions /* get the handle for IOUtil extensions */ ioUtilGH = (ioGH *) CCTK_GHExtension (cctkGH, "IO"); itmp = CCTK_MainLoopIndex (); WriteAttribute(dataset,"main loop index",itmp); itmp = cctkGH->cctk_iteration; WriteAttribute(dataset,"GH$iteration",itmp); itmp = ioUtilGH->ioproc_every; WriteAttribute(dataset,"GH$ioproc_every",itmp); itmp = CCTK_nProcs (cctkGH); WriteAttribute(dataset,"GH$nprocs",itmp); dtmp = cctkGH->cctk_time; WriteAttribute(dataset,"GH$time", dtmp); dtmp = global_time; WriteAttribute(dataset,"carpet_global_time", dtmp); itmp = reflevels; WriteAttribute(dataset,"carpet_reflevels", itmp); dtmp = delta_time; WriteAttribute(dataset,"carpet_delta_time", dtmp); version = CCTK_FullVersion(); WriteAttribute(dataset,"Cactus version", version); /* finally, we need all the times on the individual refinement levels */ const int numberofmgtimes=mglevels; WriteAttribute(dataset,"numberofmgtimes",numberofmgtimes); for(int i=0;i < numberofmgtimes;i++) { char buffer[100]; sprintf(buffer,"mgleveltimes %d",i); WriteAttribute(dataset,buffer,(double *) &leveltimes.at(i).at(0), reflevels); } herr = H5Dclose (dataset); assert(!herr); herr = H5Sclose (dataspace); assert(!herr); herr = H5Gclose (group); assert(!herr); free (parameters); } return 0; } // DumpParametersGHExtentions } // namespace CarpetIOHDF5