/*@@ @file RestoreFile.c @date Thu Jun 18 16:34:59 1998 @author Tom Goodale @desc Routines to restore GFs from a given file @enddesc @history @hauthor Gabrielle Allen @hdate 19 Oct 1998 @hdesc Changed names ready for thorn_IO @endhistory @version $Id$ @@*/ static char *rcsid = "$Id$"; #include #include #include "cctk.h" #include "cctk_Parameters.h" #include "CactusPUGH/PUGH/src/include/pugh.h" #include "CactusBase/IOUtil/src/ioGH.h" #include "ioFlexGH.h" /* MPI tag base */ #define STARTUPBASE 1001 /* The maximum number of dimensions we can deal with. There's no routine provided by the IEEEIO lib to query the number of dims from a dataset. */ #define MAXDIM 10 /* local function prototypes */ static int GetCommonAttributes (cGH *GH, IOFile ifp, int unchunked, int *index, int *grouptype, int *timelevel); #ifdef CCTK_MPI static int GetChunkAttributes (cGH *GH, IOFile ifp, int index); #endif /*@@ @routine IOFlexIO_RestoreIEEEIOfile @date Fri Jun 19 09:19:48 1998 @author Tom Goodale @desc Reads in data from an IEEEIO file. The file has to be opened already, and the file layout determined (ioproc, ioproc_every, unchunked). This information is used to read the file and distribute the data among all processors. @enddesc @calledby IOFlexIO_RecoverGH @history @hauthor Gabrielle Allen @hdate Oct 17 1998 @hdesc Changed logic so that cactus stops if any of the dimensions of the input file and the current cactus run differ. @hauthor Thomas Radke @hdate May 05 1999 @hdesc Added parameter unchunked @endhistory @var GH @vdesc Pointer to CCTK grid hierarchy @vtype cGH @vio in @endvar @var ifp @vdesc IEEEIO file pointer @vtype IOFile @vio in @endvar @var file_ioproc @vdesc for the current processor: the IO processor to receive my data from @vtype int @vio in @endvar @var file_ioproc_every @vdesc number of IO processors to use to read the file @vtype int @vio in @endvar @var file_unchunked @vdesc flag indicating whether we read from an unchunked file or not @vtype int @vio in @endvar @@*/ int IOFlexIO_RestoreIEEEIOfile (cGH *GH, IOFile ifp, int file_ioproc, int file_ioproc_every, int file_unchunked) { DECLARE_CCTK_PARAMETERS int index, gtype; int myproc, nprocs; int nDatasets, currentDataset; int timelevel; /* current timelevel to be restored */ pGH *pughGH; /* PUGH extension handle */ pGExtras *extras; #ifdef CCTK_MPI int i, proc; CCTK_INT info [3]; /* communication buffer for MPI */ #endif /* Get the handles for PUGH extensions */ pughGH = pugh_pGH (GH); myproc = CCTK_MyProc (GH); nprocs = CCTK_nProcs (GH); /* all IO procs determine the number of datasets in their checkpoint files */ if (myproc == file_ioproc) { /* Get the number of sets */ nDatasets = IOnDatasets (ifp); if (verbose) printf (" Input file has %d datasets\n", nDatasets); } /* In Cactus 3.x we had only datasets containing grid function data. This distributed data was stored as one dataset per processor within the group belonging to one IO processor. So there should be nGF*ioproc_every datasets within each checkpoint file. This consistency condition is no longer true because now there might be datasets containing a SCALAR grouptype. These datasets are stored only from the IO processors, and they are just distributed to the non-IO processors again during recovery. if (nDatasets % file_ioproc_every != 0) CCTK_WARN (0, "Number of datasets isn't a multiple of nioprocs"); */ /* Now process the datasets. All IO processors read the datasets from their checkpoint file verify their contents and communicate them to the non-IO processors. */ /* At first the code for the IO processors ... */ if (myproc == file_ioproc) { /* Seek here once to the beginning of the file, the file pointer is advanced then implicitely by subsequent calls to IOreadInfo() */ CACTUS_IEEEIO_ERROR (IOseek (ifp, 0)); /* Each IO processor loops over all available datasets, checks their consistency and broadcasts them to the non-IO processors. */ for (currentDataset = 0; currentDataset < nDatasets; currentDataset++) { /* read in the next dataset's attributes and verify them */ if (GetCommonAttributes (GH, ifp, file_unchunked, &index, >ype, &timelevel) < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Ignoring dataset %d", currentDataset); continue; } /* Read in the data */ if (verbose) { char *fullname = CCTK_FullName (index); printf (" dataset %d: %s (timelevel %d)\n", currentDataset, fullname, timelevel); free (fullname); } if (file_ioproc_every == 1) CACTUS_IEEEIO_ERROR ( IOread (ifp, CCTK_VarDataPtrI (GH, timelevel, index))); #ifdef CCTK_MPI else { int dim, npoints; void *buffer; int *chunkdims, *chunkorigin; int element_size; MPI_Datatype mpi_type; switch (CCTK_VarTypeI (index)) { case CCTK_VARIABLE_CHAR: element_size = sizeof (CCTK_CHAR); mpi_type = PUGH_MPI_CHAR; break; case CCTK_VARIABLE_INT: element_size = sizeof (CCTK_INT); mpi_type = PUGH_MPI_INT; break; case CCTK_VARIABLE_REAL: element_size = sizeof (CCTK_REAL); mpi_type = PUGH_MPI_REAL; break; #if 0 /* FIXME: Don't know how to support COMPLEX types too !! */ case CCTK_VARIABLE_COMPLEX: element_size = sizeof (CCTK_COMPLEX); mpi_type = pughGH->pugh_mpi_complex; break; #endif default: CCTK_WARN (1, "Unsupported datatype"); continue; } /* Get the pGExtras pointer as a shortcut */ extras = ((pGA ***) pughGH->variables) [index][timelevel]->extras; /* get the dimension of the variable */ dim = CCTK_GroupDimI (CCTK_GroupIndexFromVarI (index)); chunkorigin = (int *) malloc (2*dim * sizeof (int)); chunkdims = chunkorigin + dim; /* read my own data directly into data, read others data into buffer and communicate it */ if (! file_unchunked || gtype == GROUP_SCALAR) CACTUS_IEEEIO_ERROR ( IOread (ifp, CCTK_VarDataPtrI (GH, timelevel, index))); else { for (i = 0; i < dim; i++) { chunkdims [i] = extras->rnsize [file_ioproc][i]; chunkorigin [i] = extras->lb [file_ioproc][i]; } CACTUS_IEEEIO_ERROR (IOreadChunk (ifp, chunkdims, chunkorigin, CCTK_VarDataPtrI (GH, timelevel, index))); } /* read data for non-IO processors */ if (gtype == GROUP_SCALAR) { npoints = 1; buffer = CCTK_VarDataPtrI (GH, timelevel, index); } else { /* allocate memory for the biggest chunk */ npoints = extras->rnpoints [file_ioproc + 1]; for (proc = 2; proc < file_ioproc_every; proc++) if (npoints < extras->rnpoints [file_ioproc + proc]) npoints = extras->rnpoints [file_ioproc + proc]; buffer = malloc (npoints * element_size); } for (proc = file_ioproc + 1; proc < file_ioproc + file_ioproc_every && proc < nprocs; proc++) { if (gtype != GROUP_SCALAR) { if (! file_unchunked) { /* Also increment dataset counter here !!! */ currentDataset++; if (GetChunkAttributes (GH, ifp, index) < 0) { CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, "Ignoring chunk in dataset %d", currentDataset+1); continue; } CACTUS_IEEEIO_ERROR (IOread (ifp, buffer)); } else { for (i = 0; i < dim; i++) { chunkdims [i] = extras->rnsize [proc][i]; chunkorigin [i] = extras->lb [proc][i]; } CACTUS_IEEEIO_ERROR ( IOreadChunk (ifp, chunkdims, chunkorigin, buffer)); } npoints = extras->rnpoints [proc]; } /* and finally send the index and the data */ info [0] = index; info [1] = timelevel; info [2] = npoints; CACTUS_MPI_ERROR (MPI_Send (info, 3, PUGH_MPI_INT, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); CACTUS_MPI_ERROR (MPI_Send (buffer, npoints, mpi_type, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); } /* free allocated chunk */ if (gtype != GROUP_SCALAR) free (buffer); free (chunkorigin); } /* reading data for file_ioproc_every processors */ #endif } /* end of loop over all datasets */ #ifdef CCTK_MPI /* Finally an invalid variable index is communicated to indicate completion to the non-IO processors. */ info [0] = -1; for (proc = 1; proc < file_ioproc_every; proc++) CACTUS_MPI_ERROR (MPI_Send (info, 3, PUGH_MPI_INT, proc + file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); #endif } else { /* And here the code for non-IO processors: */ #ifdef CCTK_MPI int npoints; MPI_Datatype mpi_type; MPI_Status ms; /* They don't know how many datasets there are, because the IO processors could skip some on the fly during their consistency checks. The IO Processor sends the index of the variable to be processed next. So, all non-IO processors execute a loop where the termination condition is when an invalid index was received. */ while (1) { /* receive the next variable index from the IO processor */ CACTUS_MPI_ERROR (MPI_Recv (info, 3, PUGH_MPI_INT, file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); index = info [0]; timelevel = info [1]; npoints = info [2]; /* check for termination condition */ if (index < 0) break; switch (CCTK_VarTypeI (index)) { case CCTK_VARIABLE_CHAR: mpi_type = PUGH_MPI_CHAR; break; case CCTK_VARIABLE_INT: mpi_type = PUGH_MPI_INT; break; case CCTK_VARIABLE_REAL: mpi_type = PUGH_MPI_REAL; break; #if 0 /* FIXME: Don't know how to support COMPLEX types too !! */ case CCTK_VARIABLE_COMPLEX: mpi_type = pughGH->pugh_mpi_complex; break; #endif default: CCTK_WARN (1, "Unsupported datatype"); continue; } /* receive following data from my IO processor */ CACTUS_MPI_ERROR (MPI_Recv (CCTK_VarDataPtrI (GH, timelevel, index), npoints, mpi_type, file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); } #endif } return (0); } /************************* local routines ********************************/ /* local routine getDatasetAttributes() reads in the next dataset's attributes and verifies them: * checks if there is a variable with the name given by the name attribute * verifies that this variable still belongs to the same group * checks the group data info: - group type - variable type - ntimelevels - sizes (rank, dimensions) according to chunking mode If there is a mismatch a warning (warning level 2) is printed and value of -1 is returned to indicate that this dataset should be ignored. If successful, the global variable index, the group type and the timelevel to restore are stored in {*index, *grouptype, *timelevel}, and 0 is returned. */ static int GetCommonAttributes (cGH *GH, IOFile ifp, int unchunked, int *index, int *grouptype, int *timelevel) { int i, flag; int atype; Long asize; pGExtras *extras; cGroup groupdata; int vartype_stored, rank_stored, dims_stored [MAXDIM]; CCTK_INT4 grouptype_stored, numtimelevels_stored, timelevel_stored; int result, *dims; char *groupname; char fullname [512], groupname_stored [512]; /* read the next dataset's info from the file */ result = IOreadInfo (ifp, &vartype_stored, &rank_stored, dims_stored, MAXDIM); CACTUS_IEEEIO_ERROR (result); if (result == 0) { CCTK_WARN (1, "Can't read dataset info"); return (-1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (ifp, "name", &atype, &asize); if (i < 0 || atype != FLEXIO_CHAR || asize >= sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, fullname)); /* check if there is a matching variable */ *index = CCTK_VarIndex (fullname); if (*index < 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "No matching variable found for '%s'", fullname); return (-1); } /* read and verify the group name */ i = IOreadAttributeInfo (ifp, "groupname", &atype, &asize); if (i < 0 || atype != FLEXIO_CHAR || asize >= sizeof (groupname_stored)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read groupname attribute of '%s'", fullname); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, groupname_stored)); groupname = CCTK_GroupNameFromVarI (*index); if (! CCTK_Equals (groupname_stored, groupname)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Groupnames don't match for '%s'", fullname); return (-1); } free (groupname); /* read the group type */ i = IOreadAttributeInfo (ifp, "grouptype", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read grouptype attribute for '%s'", fullname); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, &grouptype_stored)); /* read the number of timelevels */ i = IOreadAttributeInfo (ifp, "ntimelevels", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read ntimelevels attribute for '%s'", fullname); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, &numtimelevels_stored)); /* read the timelevel to restore */ i = IOreadAttributeInfo (ifp, "timelevel", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read timelevel attribute for '%s'", fullname); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, &timelevel_stored)); *timelevel = (int) timelevel_stored; /* verify group type, variable type, dims, sizes and ntimelevels */ if (CCTK_GroupData (CCTK_GroupIndex (groupname_stored), &groupdata) != 0) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Could not get group info for '%s'", fullname); return (-1); } if (groupdata.grouptype != grouptype_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Group types don't match for '%s'", fullname); return (-1); } if (groupdata.numtimelevels != numtimelevels_stored) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Number of timelevels don't match for '%s'", fullname); return (-1); } /* The CCTK variable type defines do not correlate with the IEEEIO defines so compare them explicitely here. */ if ((vartype_stored == FLEXIO_REAL && groupdata.vartype == CCTK_VARIABLE_REAL) || (vartype_stored == FLEXIO_INT && groupdata.vartype == CCTK_VARIABLE_INT) || (vartype_stored == FLEXIO_CHAR && groupdata.vartype == CCTK_VARIABLE_CHAR) || #if 0 /* FIXME: Don't know how to support COMPLEX type too !! */ (vartype_stored == FLEXIO_REAL && groupdata.vartype == CCTK_VARIABLE_COMPLEX)) { #else 0) { #endif /* everything is okay */ } else { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable types don't match for '%s'", fullname); return (-1); } /* verify the dims and sizes */ flag = 0; if (groupdata.dim != rank_stored) flag = 1; switch (groupdata.grouptype) { case GROUP_SCALAR: if (dims_stored [0] != 1) flag = 1; break; case GROUP_GF: case GROUP_ARRAY: extras = ((pGA ***) pugh_pGH (GH)->variables)[*index][*timelevel]->extras; dims = unchunked ? extras->nsize : extras->lnsize; for (i = 0; i < groupdata.dim; i++) if (dims [groupdata.dim - i - 1] != dims_stored [i]) flag = 1; break; } if (flag) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Variable sizes don't match for '%s'", fullname); return (-1); } if (! CCTK_QueryGroupStorageI (GH, CCTK_GroupIndexFromVarI (*index))) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "Can't read into '%s': no storage assigned", fullname); return (-1); } *grouptype = groupdata.grouptype; return (0); } /* local routine GetChunkAttributes() reads in the info of the next dataset that should be a chunk. It verifies via the name attribute that this chunk belongs to the current variable given by its index. */ #ifdef CCTK_MPI static int GetChunkAttributes (cGH *GH, IOFile ifp, int index) { int i; int atype; Long asize; int result; int vtype_stored, rank_stored, dims_stored [MAXDIM]; char fullname [512]; /* read the next dataset's info from the file */ result = IOreadInfo (ifp, &vtype_stored, &rank_stored, dims_stored, MAXDIM); CACTUS_IEEEIO_ERROR (result); if (result == 0) { CCTK_WARN (1, "Can't read dataset info"); return (-1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (ifp, "name", &atype, &asize); if (i < 0 || atype != FLEXIO_CHAR || asize >= sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (-1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, fullname)); /* check if there is a matching variable */ if (index != CCTK_VarIndex (fullname)) { CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, "No matching variable found for '%s'", fullname); return (-1); } return (0); } #endif