/*@@ @file RecoverVar.c @date Thu Jun 18 16:34:59 1998 @author Tom Goodale @desc Routines to recover variables from a given HDF5 data or checkpoint file. These routines are used by other HDF5 IO methods. @enddesc @version $Id$ @@*/ #include #include "cctk.h" #include "cctk_Parameters.h" #include "CactusBase/IOUtil/src/ioGH.h" #include "CactusBase/IOUtil/src/ioutil_CheckpointRecovery.h" #include "CactusPUGH/PUGH/src/include/pugh.h" #include "ioHDF5UtilGH.h" /* the rcs ID and its dummy function to use it */ static const char *rcsid = "$Id$"; CCTK_FILEVERSION(BetaThorns_IOHDF5Util_RecoverVar_c) /******************************************************************** ******************** Macro Definitions ************************ ********************************************************************/ /* tag base for MPI messages */ #define MPITAGBASE 1001 /******************************************************************** ******************** Internal Typedefs ************************ ********************************************************************/ typedef struct { cGH *GH; int ioproc; int ioproc_every; int unchunked; int has_version; } iterate_info_t; typedef struct { iterate_info_t *it_info; int element_size; int hdf5type; #ifdef CCTK_MPI MPI_Datatype mpi_type; #endif } recover_info_t; /******************************************************************** ******************** Internal Routines ************************ ********************************************************************/ static herr_t processDataset (hid_t group, const char *datasetname, void *arg); /*@@ @routine IOHDF5Util_RecoverVariables @date Fri Jun 19 09:19:48 1998 @author Tom Goodale @desc Reads in data from an open HDF5 file. @enddesc @var GH @vdesc Pointer to CCTK grid hierarchy @vtype cGH * @vio in @endvar @var fileinfo @vdesc pointer to info structure describing the HDF5 file @vtype const fileinfo_t * @vio in @endvar @returntype int @returndesc 0 for success @endreturndesc @@*/ int IOHDF5Util_RecoverVariables (cGH *GH, const fileinfo_t *fileinfo) { iterate_info_t info; #ifdef CCTK_MPI pGH *pughGH; CCTK_INT var_info[3]; MPI_Status ms; MPI_Datatype mpi_type; int vindex, timelevel, proc, npoints; #endif DECLARE_CCTK_PARAMETERS info.GH = GH; info.ioproc = fileinfo->ioproc; info.unchunked = fileinfo->unchunked; info.ioproc_every = fileinfo->ioproc_every; info.has_version = fileinfo->has_version; #ifdef CCTK_MPI pughGH = PUGH_pGH (GH); /* now process the datasets: All IO processors read the datasets from their checkpoint file, verify their contents and communicate them to the non-I/O processors. */ /* At first the code for the IO processors. This holds also for the single processor case. */ if (CCTK_MyProc (GH) == fileinfo->ioproc) { #endif /* CCTK_MPI */ /* iterate over all datasets starting from "/" in the HDF5 file */ HDF5_ERROR (H5Giterate (fileinfo->file, "/", NULL, processDataset, &info)); #ifdef CCTK_MPI /* To signal completion to the non-IO processors an invalid variable index is broadcasted. */ var_info[0] = -1; for (proc = 1; proc < fileinfo->ioproc_every; proc++) for (proc = fileinfo->ioproc + 1; proc < fileinfo->ioproc + fileinfo->ioproc_every && proc < CCTK_nProcs (GH); proc++) { CACTUS_MPI_ERROR (MPI_Send (var_info, 3, PUGH_MPI_INT, proc, MPITAGBASE, pughGH->PUGH_COMM_WORLD)); } } else { /* And here the code for non-I/O processors: */ /* They don't know how many datasets there are, because the I/O processors could skip some on the fly during their consistency checks. The I/O Processor sends the index of the variable to be processed next. So, all non-I/O processors execute a loop where the termination condition is when an invalid index was received. */ while (1) { /* receive the next variable index from my IO processor */ CACTUS_MPI_ERROR (MPI_Recv (var_info, 3, PUGH_MPI_INT, fileinfo->ioproc, MPITAGBASE, pughGH->PUGH_COMM_WORLD, &ms)); vindex = var_info[0]; timelevel = var_info[1]; npoints = var_info[2]; /* check for termination condition */ if (vindex < 0) { break; } mpi_type = PUGH_MPIDataType (pughGH, CCTK_VarTypeI (vindex)); if (! mpi_type) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Unsupported datatype %d", CCTK_VarTypeI (vindex)); continue; } /* receive following data from my IO processor */ CACTUS_MPI_ERROR (MPI_Recv (GH->data[vindex][timelevel], npoints, mpi_type, fileinfo->ioproc, MPITAGBASE, pughGH->PUGH_COMM_WORLD, &ms)); } } #endif /* CCTK_MPI */ return (0); } /* NOTE: Although we could read the GH extensions from multiple recovery files in parallel, this is done only on by processor 0 here. Broadcasting the GH extensions is found faster than sending it in a loop from each I/O processor to all the non I/Os (don't have subcommunicators yet) */ int IOHDF5Util_RecoverGHextensions (cGH *GH, const fileinfo_t *fileinfo) { hid_t group; CCTK_REAL4 real4Buffer; CCTK_INT4 int4Buffer[3]; if (CCTK_MyProc (GH) == 0) { /* all the important global attributes and GH extensions are stored in the GLOBAL_ATTRIBUTES_GROUP group */ group = H5Gopen (fileinfo->file, GLOBAL_ATTRIBUTES_GROUP); int4Buffer[0] = group >= 0; if (int4Buffer[0]) { READ_ATTRIBUTE (group, "cctk_iteration", HDF5_INT4, &int4Buffer[1]); READ_ATTRIBUTE (group, "main_loop_index", HDF5_INT4, &int4Buffer[2]); READ_ATTRIBUTE (group, "cctk_time", HDF5_REAL4, &real4Buffer); HDF5_ERROR (H5Gclose (group)); } else { CCTK_WARN (1, "Can't find global attributes group. " "Is this really a Cactus HDF5 datafile ?"); } } #ifdef CCTK_MPI /* Broadcast the GH extensions to all processors */ /* NOTE: We have to use MPI_COMM_WORLD here because PUGH_COMM_WORLD is not yet set up at parameter recovery time. We also assume that PUGH_MPI_INT4 is a compile-time defined datatype. */ CACTUS_MPI_ERROR (MPI_Bcast (int4Buffer, 3, PUGH_MPI_INT4, 0,MPI_COMM_WORLD)); if (int4Buffer[0]) { CACTUS_MPI_ERROR (MPI_Bcast (&real4Buffer, 1, PUGH_MPI_REAL4, 0, MPI_COMM_WORLD)); } #endif if (int4Buffer[0]) { GH->cctk_time = real4Buffer; GH->cctk_iteration = int4Buffer[1]; CCTK_SetMainLoopIndex ((int) int4Buffer[2]); } /* return 0 for success otherwise negative */ return (int4Buffer[0] ? 0 : -1); } /* NOTE: Although we could read the parameters from multiple recovery files in parallel, this is done only on by processor 0 here. Broadcasting the complete parameter string is found faster than sending it in a loop from each IO processor to all the non IOs (don't have subcommunicators yet) */ int IOHDF5Util_RecoverParameters (const fileinfo_t *fileinfo) { hid_t group, attr, atype; char *parameters; CCTK_INT4 parameterSize; DECLARE_CCTK_PARAMETERS /* To make the compiler happy */ group = attr = 0; parameters = NULL; parameterSize = 0; if (CCTK_MyProc (NULL) == 0) { if (verbose) { CCTK_VInfo (CCTK_THORNSTRING, "Recovering parameters from checkpoint " "file '%s'", fileinfo->filename); } /* the parameters are stored in the CACTUS_PARAMETERS_GROUP group in the attribute ALL_PARAMETERS */ group = H5Gopen (fileinfo->file, CACTUS_PARAMETERS_GROUP); if (group > 0) { attr = H5Aopen_name (group, ALL_PARAMETERS); } if (group > 0 && attr > 0) { HDF5_ERROR (atype = H5Aget_type (attr)); parameterSize = H5Tget_size (atype); parameters = (char *) malloc (parameterSize + 1); HDF5_ERROR (H5Aread (attr, atype, parameters)); parameters[parameterSize] = 0; HDF5_ERROR (H5Tclose (atype)); } else { CCTK_WARN (1, "Can't find global parameters. " "Is this really a Cactus HDF5 checkpoint file ?"); } if (attr > 0) { HDF5_ERROR (H5Aclose (attr)); } if (group > 0) { HDF5_ERROR (H5Gclose (group)); } } #ifdef CCTK_MPI /* Broadcast the parameter buffer size to all processors */ /* NOTE: We have to use MPI_COMM_WORLD here because PUGH_COMM_WORLD is not yet set up at parameter recovery time. We also assume that PUGH_MPI_INT4 is a compile-time defined datatype. */ CACTUS_MPI_ERROR (MPI_Bcast (¶meterSize, 1, PUGH_MPI_INT4, 0, MPI_COMM_WORLD)); #endif if (parameterSize > 0) { #ifdef CCTK_MPI if (CCTK_MyProc (NULL) != 0) { parameters = (char *) malloc (parameterSize + 1); } CACTUS_MPI_ERROR (MPI_Bcast (parameters, parameterSize + 1, PUGH_MPI_CHAR, 0, MPI_COMM_WORLD)); #endif IOUtil_SetAllParameters (parameters); free (parameters); } /* return positive value for success otherwise negative */ return (parameterSize > 0 ? 1 : -1); } /******************************************************************** ******************** Internal Routines ************************ ********************************************************************/ /* local routine GetCommonAttributes() reads in the next dataset's attributes and verifies them: * checks if there is a variable with the name given by the name attribute * verifies that this variable still belongs to the same group * checks the group data info: - group type - variable type - ntimelevels - sizes (rank, dimensions) according to chunking mode If there is a mismatch a warning (warning level 2) is printed and a negative value is returned to indicate that this dataset should be ignored. If successful, the global variable index, the group type and the timelevel to restore are stored in {*vindex, *gtype, *timelevel}, and 0 is returned. */ static int GetCommonAttributes (cGH *GH, hid_t dataset, const char *datasetname, int unchunked, int *vindex, int *grouptype, int *timelevel, int is_group, int has_version) { cGroup group_static_data; cGroupDynamicData group_dynamic_data; int i, flag; const int *dims; int groupindex; hid_t datatype, dataspace; hsize_t rank_stored, *dims_stored; int grouptype_stored, ndims_stored, numtimelevels_stored; char *groupname, groupname_stored[128], fullvarname[128]; /* decompose the datasetname, ignore the iteration number */ if (sscanf (datasetname, "%[^ ] timelevel %d at iteration %d", fullvarname, timelevel, &i) != 3) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Cannot parse datasetname '%s'", datasetname); return (-1); } /* check if there is a matching variable */ *vindex = CCTK_VarIndex (fullvarname); if (*vindex < 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "No matching variable found for '%s'", fullvarname); return (-1); } /* read and verify the group name */ READ_ATTRIBUTE (dataset, "groupname", H5T_C_S1, groupname_stored); groupname = CCTK_GroupNameFromVarI (*vindex); if (! CCTK_Equals (groupname_stored, groupname)) { CCTK_WARN (2, "Groupnames don't match"); return (-1); } free (groupname); /* verify group type, variable type, dims, sizes and ntimelevels */ READ_ATTRIBUTE (dataset, "grouptype", H5T_NATIVE_INT, &grouptype_stored); READ_ATTRIBUTE (dataset, "ntimelevels", H5T_NATIVE_INT,&numtimelevels_stored); /* get the group data */ groupindex = CCTK_GroupIndex (groupname_stored); if (CCTK_GroupData (groupindex, &group_static_data) != 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Cannot get static group data for '%s'", fullvarname); return (-1); } /* now check the group data against the information in the checkpoint file */ if (group_static_data.grouptype != grouptype_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Group types don't match for '%s'", fullvarname); return (-1); } if (group_static_data.numtimelevels != numtimelevels_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Number of timelevels don't match for '%s'", fullvarname); return (-1); } /* increment the timelevel for data from old checkpoint files */ if (! has_version && group_static_data.numtimelevels > 1) { (*timelevel)++; } /* open the first chunk to determine datatype, dims and sizes if the dataset is a chunk group */ if (is_group) { HDF5_ERROR (dataset = H5Dopen (dataset, "chunk0")); } HDF5_ERROR (datatype = H5Dget_type (dataset)); /* The CCTK variable type defines do not correlate with the HDF5 defines so compare them explicitely here. */ flag = (H5Tget_class (datatype) == H5T_FLOAT && strncmp (CCTK_VarTypeName (group_static_data.vartype), "CCTK_VARIABLE_REAL", 18) == 0) || (H5Tget_class (datatype) == H5T_INTEGER && (strncmp (CCTK_VarTypeName (group_static_data.vartype), "CCTK_VARIABLE_INT", 17) == 0 || strcmp (CCTK_VarTypeName (group_static_data.vartype), "CCTK_VARIABLE_CHAR") == 0)) || (H5Tget_class (datatype) == H5T_COMPOUND && strncmp (CCTK_VarTypeName (group_static_data.vartype), "CCTK_VARIABLE_COMPLEX", 21) == 0); HDF5_ERROR (H5Tclose (datatype)); if (! flag) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable types don't match for '%s'", fullvarname); return (-1); } /* verify the dims and sizes */ HDF5_ERROR (dataspace = H5Dget_space (dataset)); HDF5_ERROR (ndims_stored = H5Sget_simple_extent_ndims (dataspace)); dims_stored = (hsize_t *) malloc (ndims_stored * sizeof (hsize_t)); HDF5_ERROR (rank_stored = H5Sget_simple_extent_dims (dataspace, dims_stored, NULL)); /* scalars are stored as rank 0 in HDF5 but have rank 1 in Cactus */ if (rank_stored == 0) { rank_stored = 1; } HDF5_ERROR (H5Sclose (dataspace)); flag = group_static_data.dim != (int) rank_stored; if (group_static_data.grouptype == CCTK_ARRAY || group_static_data.grouptype == CCTK_GF) { if (CCTK_GroupDynamicData (GH, groupindex, &group_dynamic_data) != 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Cannot get dynamic group data for '%s'", fullvarname); return (-1); } dims = unchunked ? group_dynamic_data.gsh : group_dynamic_data.lsh; for (i = 0; i < group_static_data.dim; i++) { if (dims[group_static_data.dim - i - 1] != (int) dims_stored[i]) { flag = 1; } } } free (dims_stored); if (flag) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable sizes don't match for '%s'", fullvarname); return (-1); } if (! CCTK_QueryGroupStorageI (GH, groupindex)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read into variable '%s': no storage", fullvarname); return (-1); } /* close the first chunk if the dataset is a chunk group */ if (is_group) { HDF5_ERROR (H5Dclose (dataset)); } *grouptype = group_static_data.grouptype; return (0); } static int IOHDF5Util_RestoreGS (hid_t dataset, int vindex, int timelevel, recover_info_t *rec_info) { void *data; #ifdef CCTK_MPI int proc; CCTK_INT var_info[3]; MPI_Comm comm; #endif data = CCTK_VarDataPtrI (rec_info->it_info->GH, timelevel, vindex); /* read the data into the local variable ... */ HDF5_ERROR (H5Dread (dataset, rec_info->hdf5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)); #ifdef CCTK_MPI /* ... and communicate it for the MPI case */ comm = PUGH_pGH (rec_info->it_info->GH)->PUGH_COMM_WORLD; /* set the variable's index and the timelevel */ var_info[0] = vindex; var_info[1] = timelevel; var_info[2] = 1; /* send info and data to the non-IO processors */ for (proc = rec_info->it_info->ioproc + 1; proc < rec_info->it_info->ioproc + rec_info->it_info->ioproc_every && proc < CCTK_nProcs (rec_info->it_info->GH); proc++) { CACTUS_MPI_ERROR (MPI_Send (var_info, 3, PUGH_MPI_INT, proc, MPITAGBASE, comm)); CACTUS_MPI_ERROR (MPI_Send (data, 1, rec_info->mpi_type, proc, MPITAGBASE, comm)); } #endif /* CCTK_MPI */ return (0); } static int IOHDF5Util_RestoreGA (hid_t dataset, int vindex, int timelevel, recover_info_t *rec_info) { #ifdef CCTK_MPI int i, dim, proc, npoints; CCTK_INT var_info[3]; pGH *pughGH; void *buffer, *data; hid_t filespace, memspace, chunk; pGExtras *extras; char chunkname[32]; hssize_t *chunk_origin; hsize_t *chunk_dims; #endif /* single processor case is easy: just read the whole dataset */ if (CCTK_nProcs (rec_info->it_info->GH) == 1) { HDF5_ERROR (H5Dread (dataset, rec_info->hdf5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, rec_info->it_info->GH->data[vindex][timelevel])); return (0); } #ifdef CCTK_MPI /* Get the handle for PUGH extensions */ pughGH = PUGH_pGH (rec_info->it_info->GH); /* Get the pGExtras pointer as a shortcut */ extras = ((pGA ***) pughGH->variables)[vindex][timelevel]->extras; /* allocate memory for the biggest chunk */ npoints = extras->rnpoints[rec_info->it_info->ioproc + 1]; for (proc = 2; proc < rec_info->it_info->ioproc_every; proc++) { if (npoints < extras->rnpoints[rec_info->it_info->ioproc + proc]) { npoints = extras->rnpoints[rec_info->it_info->ioproc + proc]; } } buffer = malloc (npoints * rec_info->element_size); /* set the variable's index and timelevel to restore */ var_info[0] = vindex; var_info[1] = timelevel; /* now loop over the group of processors associated to each IO processor */ for (proc = rec_info->it_info->ioproc; proc < rec_info->it_info->ioproc + rec_info->it_info->ioproc_every && proc < CCTK_nProcs (rec_info->it_info->GH); proc++) { /* read own data directly into variable */ if (proc == rec_info->it_info->ioproc) { data = rec_info->it_info->GH->data[vindex][timelevel]; } else { data = buffer; } if (! rec_info->it_info->unchunked) { /* Chunked data is stored as separate chunk datasets within a group. So open, read and close the separate chunks here. */ sprintf (chunkname, "chunk%d", proc - rec_info->it_info->ioproc); HDF5_ERROR (chunk = H5Dopen (dataset, chunkname)); HDF5_ERROR (H5Dread (chunk, rec_info->hdf5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)); HDF5_ERROR (H5Dclose (chunk)); } else { /* get the dimension of the variable */ dim = CCTK_GroupDimI (CCTK_GroupIndexFromVarI (vindex)); chunk_origin = (hssize_t *) malloc (dim * sizeof (hssize_t)); chunk_dims = (hsize_t *) malloc (dim * sizeof (hsize_t)); /* Unchunked data is read as one hyperslab per processor. So prepare the memspace and the filespace and read the hyperslab. */ for (i = 0; i < dim; i++) { chunk_dims[dim - 1 - i] = extras->rnsize[proc][i]; chunk_origin[dim - 1 - i] = extras->lb[proc][i]; } HDF5_ERROR (filespace = H5Dget_space (dataset)); HDF5_ERROR (memspace = H5Screate_simple (dim, chunk_dims, NULL)); HDF5_ERROR (H5Sselect_hyperslab (filespace, H5S_SELECT_SET, chunk_origin, NULL, chunk_dims, NULL)); HDF5_ERROR (H5Dread (dataset, rec_info->hdf5type, memspace, filespace, H5P_DEFAULT, data)); HDF5_ERROR (H5Sclose (memspace)); HDF5_ERROR (H5Sclose (filespace)); free (chunk_dims); free (chunk_origin); } /* send the index and the data to the non-IO processors */ if (proc != rec_info->it_info->ioproc) { var_info[2] = extras->rnpoints[proc]; CACTUS_MPI_ERROR (MPI_Send (var_info, 3, PUGH_MPI_INT, proc, MPITAGBASE, pughGH->PUGH_COMM_WORLD)); CACTUS_MPI_ERROR (MPI_Send (data, extras->rnpoints[proc], rec_info->mpi_type, proc, MPITAGBASE, pughGH->PUGH_COMM_WORLD)); } } free (buffer); #endif /* CCTK_MPI */ return (0); } static herr_t processDataset (hid_t group, const char *datasetname, void *arg) { ioGH *ioUtilGH; ioHDF5UtilGH *myGH; int vindex, vtype, gtype, timelevel, is_group; iterate_info_t *it_info = (iterate_info_t *) arg; recover_info_t rec_info; hid_t dataset; H5G_stat_t object_info; char *fullname; DECLARE_CCTK_PARAMETERS /* skip the global attributes and GH extensions groups */ if (! strcmp (datasetname, CACTUS_PARAMETERS_GROUP) || ! strcmp (datasetname, GLOBAL_ATTRIBUTES_GROUP)) { return (0); } HDF5_ERROR (H5Gget_objinfo (group, datasetname, 0, &object_info)); is_group = object_info.type == H5G_GROUP; if (is_group) { HDF5_ERROR (dataset = H5Gopen (group, datasetname)); } else { HDF5_ERROR (dataset = H5Dopen (group, datasetname)); } /* read in the dataset's attributes and verify them */ if (GetCommonAttributes (it_info->GH, dataset, datasetname, it_info->unchunked, &vindex, >ype, &timelevel, is_group, it_info->has_version) < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Ignoring dataset '%s'", datasetname); return (0); } ioUtilGH = (ioGH *) CCTK_GHExtension (it_info->GH, "IO"); myGH = (ioHDF5UtilGH *) CCTK_GHExtension (it_info->GH, "IOHDF5Util"); /* if we read in initial data via the file reader interface check whether the user wants to have this variable restored */ if (ioUtilGH->do_inVars && ! ioUtilGH->do_inVars[vindex]) { if (verbose) { fullname = CCTK_FullName (vindex); CCTK_VInfo (CCTK_THORNSTRING, "Ignoring variable '%s' for file reader " "recovery", fullname); free (fullname); } return (0); } if (verbose) { fullname = CCTK_FullName (vindex); CCTK_VInfo (CCTK_THORNSTRING, "Restoring variable '%s' (timelevel %d)", fullname, timelevel); free (fullname); } vtype = CCTK_VarTypeI (vindex); rec_info.it_info = it_info; rec_info.element_size = CCTK_VarTypeSize (vtype); #ifdef CCTK_MPI rec_info.mpi_type = PUGH_MPIDataType (PUGH_pGH (it_info->GH), vtype); #endif rec_info.hdf5type = IOHDF5Util_DataType (myGH, vtype); if (rec_info.element_size <= 0 || #ifdef CCTK_MPI rec_info.mpi_type == MPI_DATATYPE_NULL || #endif rec_info.hdf5type < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Unsupported variable datatype %d", vtype); return (1); } /* Read in the data */ switch (gtype) { case CCTK_SCALAR: IOHDF5Util_RestoreGS (dataset, vindex, timelevel, &rec_info); break; case CCTK_GF: case CCTK_ARRAY: IOHDF5Util_RestoreGA (dataset, vindex, timelevel, &rec_info); break; default: CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Unsupported group type %d", gtype); return (1); } if (is_group) { HDF5_ERROR (H5Gclose (dataset)); } else { HDF5_ERROR (H5Dclose (dataset)); } return (0); }