/*@@ @file RestoreFile.c @date Thu Jun 18 16:34:59 1998 @author Tom Goodale @desc Routines to restore grid variables from a given file @enddesc @version $Id$ @@*/ #include #include #include "cctk.h" #include "cctk_Parameters.h" #include "util_String.h" #include "CactusPUGH/PUGH/src/include/pugh.h" #include "CactusBase/IOUtil/src/ioGH.h" #include "ioFlexGH.h" /* the rcs ID and its dummy function to use it */ static const char *rcsid = "$Id$"; CCTK_FILEVERSION(CactusPUGHIO_IOFlexIO_RestoreFile_c) /******************************************************************** ******************** Macro Definitions ************************ ********************************************************************/ /* tag base for MPI messages */ #define STARTUPBASE 1001 /* The maximum number of dimensions we can deal with. There's no routine provided by the IEEEIO lib to query the number of dims from a dataset. */ #define MAXDIM 10 /******************************************************************** ******************** Internal Routines ************************ ********************************************************************/ static int GetCommonAttributes (cGH *GH, IOFile fid, int unchunked, int *vindex, int *grouptype, int *timelevel, int *iteration, int has_version); #ifdef CCTK_MPI static int GetChunkAttributes (IOFile fid, int vindex); #endif /******************************************************************** ******************** External Routines ************************ ********************************************************************/ int IOFlexIOi_RecoverVariables (cGH *GH, fileinfo_t *file); /*@@ @routine IOFlexIO_RecoverVariables @date Fri Jun 19 09:19:48 1998 @author Tom Goodale @desc Reads in data from an IEEEIO file. The file has to be opened already, and the file layout determined (ioproc, ioproc_every, unchunked). This information is used to read the file and distribute the data among all processors. @enddesc @history @hauthor Gabrielle Allen @hdate Oct 17 1998 @hdesc Changed logic so that cactus stops if any of the dimensions of the input file and the current cactus run differ. @hauthor Thomas Radke @hdate May 05 1999 @hdesc Added parameter unchunked @endhistory @var GH @vdesc Pointer to CCTK grid hierarchy @vtype cGH * @vio in @endvar @var file @vdesc IEEEIO file info structure @vtype fileinfo_t * @vio in @endvar @@*/ int IOFlexIOi_RecoverVariables (cGH *GH, fileinfo_t *file) { int vindex, gtype, timelevel, iteration, myproc; int nDatasets, currentDataset; const ioGH *ioUtilGH; char *fullname; #ifdef CCTK_MPI int i, proc, nprocs; const pGH *pughGH; pGExtras *extras; CCTK_INT info[3]; int dim, npoints, vtype; void *buffer, *data; int *chunkdims, *chunkorigin; int element_size, flexio_type; MPI_Datatype mpi_type; MPI_Status ms; #endif DECLARE_CCTK_PARAMETERS /* suppress compiler warnings about potentially uninitialized variables */ nDatasets = 0; myproc = CCTK_MyProc (GH); ioUtilGH = CCTK_GHExtension (GH, "IO"); #ifdef CCTK_MPI pughGH = PUGH_pGH (GH); nprocs = CCTK_nProcs (GH); #endif /* all I/O procs determine the number of datasets in their checkpoint files */ if (myproc == file->ioproc) { /* get the number of sets */ nDatasets = IOnDatasets (file->fid); if (CCTK_Equals (verbose, "full")) { CCTK_VInfo (CCTK_THORNSTRING, " Input file has %d datasets", nDatasets); } } /* In Cactus 3.x we had only datasets containing grid function data. This distributed data was stored as one dataset per processor within the group belonging to one IO processor. So there should be nGF*ioproc_every datasets within each checkpoint file. This consistency condition is no longer true because now there might be datasets containing a SCALAR grouptype. These datasets are stored only from the IO processors, and they are just distributed to the non-IO processors again during recovery. if (nDatasets % file->ioproc_every != 0) CCTK_WARN (0, "Number of datasets isn't a multiple of nioprocs"); */ /* now process the datasets. All I/O processors read the datasets from their checkpoint file verify their contents and communicate them to the non-I/O processors. */ /* at first the code for the I/O processors ... */ if (myproc == file->ioproc) { /* seek here once to the beginning of the file, the file pointer is advanced then implicitely by subsequent calls to IOreadInfo() */ FLEXIO_ERROR (IOseek (file->fid, 0)); /* each IO processor loops over all available datasets, checks their consistency and broadcasts them to the non-IO processors */ for (currentDataset = 0; currentDataset < nDatasets; currentDataset++) { /* read in the next dataset's attributes and verify them */ if (GetCommonAttributes (GH, file->fid, file->unchunked, &vindex, >ype, &timelevel, &iteration, file->has_version) < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Ignoring dataset %d", currentDataset); continue; } /* if we read in initial data via the file reader interface check whether the user wants to have this variable restored */ if (ioUtilGH->do_inVars && ioUtilGH->do_inVars[vindex] >= 0 && ioUtilGH->do_inVars[vindex] != iteration + 1) { if (CCTK_Equals (verbose, "full")) { CCTK_VInfo (CCTK_THORNSTRING, "Ignoring dataset %d for file reader " "recovery", currentDataset); } continue; } /* read in the data */ if (CCTK_Equals (verbose, "full") || (ioUtilGH->do_inVars && ! CCTK_Equals (verbose, "none"))) { fullname = CCTK_FullName (vindex); CCTK_VInfo (CCTK_THORNSTRING, " dataset %d: %s (timelevel %d, " "cctk_iteration %d)", currentDataset, fullname, timelevel, iteration); free (fullname); } if (file->ioproc_every == 1) { FLEXIO_ERROR (IOread (file->fid, CCTK_VarDataPtrI (GH, timelevel, vindex))); } #ifdef CCTK_MPI else { vtype = CCTK_VarTypeI (vindex); element_size = CCTK_VarTypeSize (vtype); mpi_type = PUGH_MPIDataType (pughGH, vtype); flexio_type = IOFlexIO_DataType (vtype); if (element_size <= 0 || mpi_type == MPI_DATATYPE_NULL || flexio_type<0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Unsupported variable type %d", vtype); continue; } /* get the pGExtras pointer as a shortcut */ extras = ((pGA ***) pughGH->variables)[vindex][timelevel]->extras; data = CCTK_VarDataPtrI (GH, timelevel, vindex); /* get the dimension of the variable */ dim = CCTK_GroupDimFromVarI (vindex); chunkorigin = malloc (2*dim * sizeof (int)); chunkdims = chunkorigin + dim; /* read my own data directly into data, read others data into buffer and communicate it */ if (! file->unchunked || gtype == CCTK_SCALAR) { FLEXIO_ERROR (IOread (file->fid, data)); } else { if (extras->rnpoints[file->ioproc] > 0) { for (i = 0; i < dim; i++) { chunkdims[i] = extras->rnsize[file->ioproc][i]; chunkorigin[i] = extras->lb[file->ioproc][i]; } FLEXIO_ERROR (IOreadChunk (file->fid, chunkdims, chunkorigin,data)); } } /* read data for non-IO processors */ if (gtype == CCTK_SCALAR) { npoints = 1; buffer = data; } else { /* allocate memory for the biggest chunk */ npoints = extras->rnpoints[file->ioproc + 1]; for (proc = 2; proc < file->ioproc_every; proc++) { if (npoints < extras->rnpoints[file->ioproc + proc]) { npoints = extras->rnpoints[file->ioproc + proc]; } } buffer = npoints > 0 ? malloc (npoints * element_size) : NULL; } for (proc = file->ioproc + 1; proc < file->ioproc + file->ioproc_every && proc < nprocs; proc++) { if (gtype != CCTK_SCALAR) { npoints = extras->rnpoints[proc]; if (! file->unchunked) { /* Also increment dataset counter here !!! */ currentDataset++; if (GetChunkAttributes (file->fid, vindex) < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Ignoring chunk in dataset %d", currentDataset+1); continue; } FLEXIO_ERROR (IOread (file->fid, buffer)); } else if (npoints > 0) { for (i = 0; i < dim; i++) { chunkdims[i] = extras->rnsize[proc][i]; chunkorigin[i] = extras->lb[proc][i]; } FLEXIO_ERROR (IOreadChunk (file->fid, chunkdims, chunkorigin, buffer)); } } /* and finally send the index and the data */ info[0] = vindex; info[1] = timelevel; info[2] = npoints; CACTUS_MPI_ERROR (MPI_Send (info, 3, PUGH_MPI_INT, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); if (npoints > 0) { CACTUS_MPI_ERROR (MPI_Send (buffer, npoints, mpi_type, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); } } /* free allocated chunk */ if (buffer != data) { free (buffer); } free (chunkorigin); } /* reading data for file->ioproc_every processors */ #endif } /* end of loop over all datasets */ #ifdef CCTK_MPI /* finally an invalid variable index is communicated to indicate completion to the non-I/O processors */ info[0] = -1; for (proc = file->ioproc + 1; proc < file->ioproc + file->ioproc_every && proc < nprocs; proc++) { CACTUS_MPI_ERROR (MPI_Send (info, 3, PUGH_MPI_INT, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); } #endif } else { /* and here the code for non-IO processors: */ #ifdef CCTK_MPI /* They don't know how many datasets there are, because the IO processors could skip some on the fly during their consistency checks. The IO Processor sends the index of the variable to be processed next. So, all non-IO processors execute a loop where the termination condition is when an invalid index was received. */ while (1) { /* receive the next variable index from the IO processor */ CACTUS_MPI_ERROR (MPI_Recv (info, 3, PUGH_MPI_INT, file->ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); vindex = info[0]; timelevel = info[1]; npoints = info[2]; /* check for termination condition */ if (vindex < 0) { break; } /* receive following data from my IO processor */ if (npoints > 0) { mpi_type = PUGH_MPIDataType (pughGH, CCTK_VarTypeI (vindex)); CACTUS_MPI_ERROR (MPI_Recv (CCTK_VarDataPtrI (GH, timelevel, vindex), npoints, mpi_type, file->ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); } } #endif } return (0); } /******************************************************************** ******************** Internal Routines ************************ ********************************************************************/ /* local routine getDatasetAttributes() reads in the next dataset's attributes and verifies them: * checks if there is a variable with the name given by the name attribute * verifies that this variable still belongs to the same group * checks the group data info: - group type - variable type - ntimelevels - iteration number - sizes (rank, dimensions) according to chunking mode If there is a mismatch a warning (warning level 2) is printed and value of -1 is returned to indicate that this dataset should be ignored. If successful, the global variable index, the group type and the timelevel to restore are stored in {*index, *grouptype, *timelevel}, and 0 is returned. */ static int GetCommonAttributes (cGH *GH, IOFile fid, int unchunked, int *vindex, int *grouptype, int *timelevel, int *iteration, int has_version) { int i, flag, atype, vartype_stored, rank_stored, groupindex, result; Long asize; cGroup group_static_data; cGroupDynamicData group_dynamic_data; CCTK_INT4 grouptype_stored, numtimelevels_stored, timelevel_stored, iteration_stored; char *groupname, *msg, *oldmsg; const int *dims; int dims_stored[MAXDIM]; char fullname[512], groupname_stored[512]; /* read the next dataset's info from the file */ result = IOreadInfo (fid, &vartype_stored, &rank_stored, dims_stored, MAXDIM); FLEXIO_ERROR (result); if (result == 0) { CCTK_WARN (1, "Can't read dataset info"); return (-1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (fid, "name", &atype, &asize); if (i < 0 || atype != CHAR || asize >= (int) sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, fullname)); /* check if there is a matching variable */ *vindex = CCTK_VarIndex (fullname); if (*vindex < 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "No matching variable found for '%s'", fullname); return (-1); } /* read and verify the group name */ i = IOreadAttributeInfo (fid, "groupname", &atype, &asize); if (i < 0 || atype != CHAR || (unsigned int) asize >= sizeof (groupname_stored)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read groupname attribute of '%s'", fullname); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, groupname_stored)); groupname = CCTK_GroupNameFromVarI (*vindex); if (! CCTK_Equals (groupname_stored, groupname)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Groupnames don't match for '%s'", fullname); return (-1); } free (groupname); /* read the group type */ i = IOreadAttributeInfo (fid, "grouptype", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read grouptype attribute for '%s'", fullname); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, &grouptype_stored)); /* be backwards compatible */ switch (grouptype_stored) { case 1: grouptype_stored = CCTK_SCALAR; break; case 2: grouptype_stored = CCTK_GF; break; case 3: grouptype_stored = CCTK_ARRAY; break; } /* read the iteration number */ i = IOreadAttributeInfo (fid, "iteration", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read iteration attribute for '%s'", fullname); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, &iteration_stored)); *iteration = iteration_stored; /* read the number of timelevels */ i = IOreadAttributeInfo (fid, "ntimelevels", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read ntimelevels attribute for '%s'", fullname); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, &numtimelevels_stored)); /* read the timelevel to restore */ i = IOreadAttributeInfo (fid, "timelevel", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read timelevel attribute for '%s'", fullname); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, &timelevel_stored)); *timelevel = (int) timelevel_stored; /* verify group type, variable type, dims, sizes and ntimelevels */ groupindex = CCTK_GroupIndex (groupname_stored); if (CCTK_GroupData (groupindex, &group_static_data) != 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not get group info for '%s'", fullname); return (-1); } if (group_static_data.grouptype != grouptype_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Group types don't match for '%s'", fullname); return (-1); } if (group_static_data.numtimelevels != numtimelevels_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Number of timelevels don't match for '%s'", fullname); return (-1); } /* increment the timelevel for data from old checkpoint files */ if (! has_version && numtimelevels_stored > 1) { (*timelevel)++; } /* The CCTK variable type defines do not correlate with the IEEEIO defines so compare them explicitely here. */ if ((vartype_stored == FLEXIO_REAL && group_static_data.vartype == CCTK_VARIABLE_REAL) || (vartype_stored == FLEXIO_INT && group_static_data.vartype == CCTK_VARIABLE_INT) || (vartype_stored == FLEXIO_BYTE && group_static_data.vartype == CCTK_VARIABLE_BYTE) || #if 0 /* FIXME: Don't know how to support COMPLEX type too !! */ (vartype_stored == FLEXIO_REAL && group_static_data.vartype == CCTK_VARIABLE_COMPLEX)) #else 0) { #endif /* everything is okay */ } else { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable types don't match for '%s'", fullname); return (-1); } /* verify the dims and sizes */ if (group_static_data.grouptype == CCTK_SCALAR) { rank_stored = 0; } flag = group_static_data.dim != rank_stored; if (group_static_data.grouptype == CCTK_SCALAR) { flag |= dims_stored[0] != 1; } else { if (CCTK_GroupDynamicData (GH, groupindex, &group_dynamic_data) != 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Cannot get dynamic group data for '%s'", fullname); return (-1); } dims = unchunked ? group_dynamic_data.gsh : group_dynamic_data.lsh; for (i = 0; i < group_static_data.dim; i++) { flag |= dims[i] != dims_stored[i]; } } if (flag) { if (group_static_data.dim != rank_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable dimensions don't match for '%s', got %d, expected " "%d", fullname, rank_stored, group_static_data.dim); } else { msg = NULL; Util_asprintf (&msg, "Variable sizes don't match for '%s', got (%d", fullname, dims_stored[0]); for (i = 1; i < group_static_data.dim; i++) { oldmsg = msg; Util_asprintf (&msg, "%s, %d", msg, dims_stored[i]); free (oldmsg); } dims = unchunked ? group_dynamic_data.gsh : group_dynamic_data.lsh; oldmsg = msg; Util_asprintf (&msg, "%s), expected (%d", msg, dims[group_static_data.dim - 1]); free (oldmsg); for (i = 1; i < group_static_data.dim; i++) { oldmsg = msg; Util_asprintf (&msg, "%s, %d", msg, dims[group_static_data.dim-i-1]); free (oldmsg); } oldmsg = msg; Util_asprintf (&msg, "%s)", msg); free (oldmsg); CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, msg); free (msg); } return (-1); } if (! CCTK_QueryGroupStorageI (GH, CCTK_GroupIndexFromVarI (*vindex))) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read into '%s': no storage assigned", fullname); return (-1); } *grouptype = group_static_data.grouptype; return (0); } /* local routine GetChunkAttributes() reads in the info of the next dataset that should be a chunk. It verifies via the name attribute that this chunk belongs to the current variable given by its index. */ #ifdef CCTK_MPI static int GetChunkAttributes (IOFile fid, int vindex) { int i, atype, vtype_stored, rank_stored; Long asize; int dims_stored[MAXDIM]; char fullname[512]; /* read the next dataset's info from the file */ if (! IOreadInfo (fid, &vtype_stored, &rank_stored, dims_stored, MAXDIM)) { CCTK_WARN (1, "Can't read dataset info"); return (-1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (fid, "name", &atype, &asize); if (i < 0 || atype != CHAR || asize >= (int) sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (-1); } FLEXIO_ERROR (IOreadAttribute (fid, i, fullname)); /* check if there is a matching variable */ if (vindex != CCTK_VarIndex (fullname)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "No matching variable found for '%s'", fullname); return (-1); } return (0); } #endif