/*@@ @file RestoreFile.c @date Thu Jun 18 16:34:59 1998 @author Tom Goodale @desc Routines to restore GFs from a given file @enddesc @history @hauthor Gabrielle Allen @hdate 19 Oct 1998 @hdesc Changed names ready for thorn_IO @endhistory @version $Id$ @@*/ static char *rcsid = "$Id$"; #include #include #include "cctk.h" #include "cctk_Flesh.h" #include "cctk_Groups.h" #include "cctk_Misc.h" #include "cctk_Comm.h" #include "cctk_GHExtensions.h" #include "cctk_WarnLevel.h" #include "cctk_parameters.h" #ifdef CACTUSPUGH_PUGH #include "CactusPUGH/PUGH/src/include/pugh.h" #endif #include "CactusBase/IOUtil/src/ioGH.h" #include "ioFlexGH.h" /* MPI tag base */ #define STARTUPBASE 1001 /* the maximum number of dimensions we can deal with (up to now :-) */ #define MAXDIM 3 /* local routine getDatasetAttributes() reads in the next dataset's attributes and verifies them: * checks if there is a variable with the name given by the name attribute * verifies that this variable still belongs to the same group * checks the group data info: - group type - variable type - ntimelevels - sizes (rank, dimensions) according to chunking mode If there is a mismatch a warning (warning level 2) is printed and value of 1 is returned to indicate that this dataset should be ignored. If successful, the global variable index, the group type and the timelevel to restore are stored in {*index, *gtype, *timelevel}, and 0 is returned. Currently only restoring of CCTK_VARIABLE_REAL variables is supported. */ int GetCommonAttributes (cGH *GH, IOFile ifp, int unchunked, int *index, int *gtype, int *timelevel) { int i, flag; int atype; Long asize; int vtype_stored, rank_stored, dims_stored [MAXDIM]; CCTK_INT4 gtype_stored, ntimelevels_stored, timelevel_stored; int vtype, rank, ntimelevels; int result; char *groupname; char fullname [512], groupname_stored [512]; char msg [160]; /* read the next dataset's info from the file */ result = IOreadInfo (ifp, &vtype_stored, &rank_stored, dims_stored, MAXDIM); CACTUS_IEEEIO_ERROR (result); if (result == 0) { CCTK_WARN (1, "Can't read dataset info"); return (1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (ifp, "name", &atype, &asize); if (i < 0 || (atype != BYTE && atype != CHAR) || asize >= sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, fullname)); /* check if there is a matching variable */ *index = CCTK_VarIndex (fullname); if (*index < 0) { sprintf (msg, "No matching variable found for '%s'", fullname); CCTK_WARN (2, msg); return (1); } /* read and verify the group name */ i = IOreadAttributeInfo (ifp, "groupname", &atype, &asize); if (i < 0 || (atype != BYTE && atype != CHAR) || asize >= sizeof (groupname_stored)) { sprintf (msg, "Can't read groupname attribute of '%s'", fullname); CCTK_WARN (2, msg); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, groupname_stored)); groupname = CCTK_GroupNameFromVarI (*index); if (! CCTK_Equals (groupname_stored, groupname)) { sprintf (msg, "Groupnames don't match for '%s'", fullname); CCTK_WARN (2, msg); return (1); } free (groupname); /* read the group type */ i = IOreadAttributeInfo (ifp, "grouptype", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { sprintf (msg, "Can't read grouptype attribute for '%s'", fullname); CCTK_WARN (2, msg); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, >ype_stored)); /* read the number of timelevels */ i = IOreadAttributeInfo (ifp, "ntimelevels", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { sprintf (msg, "Can't read ntimelevels attribute for '%s'", fullname); CCTK_WARN (2, msg); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, &ntimelevels_stored)); /* read the timelevel to restore */ i = IOreadAttributeInfo (ifp, "timelevel", &atype, &asize); if (i < 0 || atype != FLEXIO_INT4 || asize != 1) { sprintf (msg, "Can't read timelevel attribute for '%s'", fullname); CCTK_WARN (2, msg); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, &timelevel_stored)); *timelevel = timelevel_stored; /* verify group type, variable type, dims, sizes and ntimelevels */ CCTK_GroupData (CCTK_GroupIndex (groupname_stored), gtype, &vtype, &rank, &i, &ntimelevels); if (*gtype != gtype_stored) { sprintf (msg, "Group types don't match for '%s'", fullname); CCTK_WARN (2, msg); return (1); } if (ntimelevels != ntimelevels_stored) { sprintf (msg, "Number of timelevels don't match for '%s'", fullname); CCTK_WARN (2, msg); return (1); } /* The CCTK variable type defines do not correlate with the IEEEIO defines so compare them explicitely here. Up to now CCTK knows only REALs, but COMPLEX types aren't supported anyway by IOFlexIO. */ if ((vtype_stored == FLEXIO_REAL && vtype == CCTK_VARIABLE_REAL) || (vtype_stored == FLEXIO_INT && vtype == CCTK_VARIABLE_INT) || (vtype_stored == FLEXIO_CHAR && vtype == CCTK_VARIABLE_CHAR)) { #if 0 || (vtype_stored == FLEXIO_COMPLEX && vtype == CCTK_VARIABLE_COMPLEX)) { #endif /* everything is okay */ } else { sprintf (msg, "Variable types don't match for '%s'", fullname); CCTK_WARN (2, msg); return (1); } /* verify the dims and sizes */ flag = 0; if (rank != rank_stored) flag = 1; switch (*gtype) { case GROUP_SCALAR: if (dims_stored [0] != 1) flag = 1; break; case GROUP_ARRAY: /*** FIXME: what's the local size of the array ?? ***/ /* for (i = 0; i < rank; i++) if (*(int *) CCTK_ArrayGroupSize (GH, i, groupname_stored) != dims_stored [i]) flag = 1; */ break; case GROUP_GF: if (unchunked) { for (i = 0; i < rank; i++) if (GH->cctk_gsh [i] != dims_stored [i]) flag = 1; } else { for (i = 0; i < rank; i++) if (GH->cctk_lsh [i] != dims_stored [i]) flag = 1; } break; } if (flag) { sprintf (msg, "Variable sizes don't match for '%s'", fullname); CCTK_WARN (2, msg); return (1); } if (! CCTK_QueryGroupStorageI (GH, CCTK_GroupIndexFromVarI (*index))) { CCTK_WARN (2, "Can't read into variable with no storage"); return (1); } return (0); } /* local routine GetChunkAttributes() reads in the info of the next dataset that should be a chunk. It verifies via the name attribute that this chunk belongs to the current variable given by its index. */ int GetChunkAttributes (cGH *GH, IOFile ifp, int index) { int i; int atype; Long asize; int result; int vtype_stored, rank_stored, dims_stored [MAXDIM]; char fullname [512]; char msg [160]; pGH *pughGH; /* PUGH extension handle */ /* Get the handle for PUGH extensions */ pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")]; /* read the next dataset's info from the file */ result = IOreadInfo (ifp, &vtype_stored, &rank_stored, dims_stored, MAXDIM); CACTUS_IEEEIO_ERROR (result); if (result == 0) { CCTK_WARN (1, "Can't read dataset info"); return (1); } /* retrieve the name attribute */ i = IOreadAttributeInfo (ifp, "name", &atype, &asize); if (i < 0 || (atype != BYTE && atype != CHAR) || asize >= sizeof (fullname)) { CCTK_WARN (2, "Can't read name attribute"); return (1); } CACTUS_IEEEIO_ERROR (IOreadAttribute (ifp, i, fullname)); /* check if there is a matching variable */ if (index != CCTK_VarIndex (fullname)) { sprintf (msg, "No matching variable found for '%s'", fullname); CCTK_WARN (2, msg); return (1); } return (0); } /*@@ @routine IOFlexIO_RestoreIEEEIOfile @date Fri Jun 19 09:19:48 1998 @author Tom Goodale @desc Reads in data from an open IEEEIO file. Code was originally in StartupReader. @enddesc @calls @calledby @history @hauthor Gabrielle Allen @hdate Oct 17 1998 @hdesc Changed logic so that cactus stops if any of the dimensions of the input file and the current cactus run differ. @hauthor Thomas Radke @hdate May 05 1999 @hdesc Added parameter unchunked @endhistory @@*/ int IOFlexIO_RestoreIEEEIOfile (cGH *GH, IOFile ifp, int file_ioproc, int file_ioproc_every, int file_unchunked) { #ifdef CACTUSPUGH_PUGH DECLARE_CCTK_PARAMETERS int index, gtype; int myproc, nprocs; int nDatasets, currentDataset; int timelevel; /* current timelevel to be restored */ pGH *pughGH; /* PUGH extension handle */ char msg [512]; /* just a message buffer */ #ifdef MPI int proc; CCTK_INT info [2]; /* communication buffer for MPI */ #endif /* Get the handles for PUGH and IO extensions */ pughGH = (pGH *) GH->extensions [CCTK_GHExtensionHandle ("PUGH")]; myproc = CCTK_MyProc (GH); nprocs = CCTK_nProcs (GH); /* all IO procs determine the number of datasets in their checkpoint files */ if (myproc == file_ioproc) { /* Get the number of sets */ nDatasets = IOnDatasets (ifp); if (output_verbose) printf (" Input file has %d datasets\n", nDatasets); } /* In Cactus 3.x we had only datasets containing grid function data. This distributed data was stored as one dataset per processor within the group belonging to one IO processor. So there should be nGF*ioproc_every datasets within each checkpoint file. This consistency condition is no longer true because now there might be datasets containing a SCALAR grouptype. These datasets are stored only from the IO processors, and they are just distributed to the non-IO processors again during recovery. if (nDatasets % file_ioproc_every != 0) CCTK_WARN (0, "Number of datasets isn't a multiple of nioprocs"); */ /* Now process the datasets. All IO processors read the datasets from their checkpoint file verify their contents and communicate them to the non-IO processors. */ /* At first the code for the IO processors ... */ if (myproc == file_ioproc) { /* Seek here once to the beginning of the file, the file pointer is advanced then implicitely by subsequent calls to IOreadInfo() */ CACTUS_IEEEIO_ERROR (IOseek (ifp, 0)); /* Each IO processor loops over all available datasets, checks their consistency and broadcasts them to the non-IO processors. */ for (currentDataset = 0; currentDataset < nDatasets; currentDataset++) { /* read in the next dataset's attributes and verify them */ if (GetCommonAttributes (GH, ifp, file_unchunked, &index, >ype, &timelevel)) { sprintf (msg, "Ignoring dataset %d", currentDataset); CCTK_WARN (1, msg); } /*** FIXME: process all group types ! ***/ else if (gtype != GROUP_GF && gtype != GROUP_SCALAR) { CCTK_WARN (1, "Currently only restoring of GF and SCALAR datasets is supported"); } /* Read in the data */ else { if (output_verbose) { char *varname = CCTK_FullName (index); printf (" dataset %d: %s (timelevel %d)\n", currentDataset, varname, timelevel); free (varname); } if (file_ioproc_every == 1) CACTUS_IEEEIO_ERROR (IOread (ifp, GH->data [index][timelevel])); #ifdef MPI else { int npoints; void *buffer; int chunkdims [3], *chunkorigin; int element_size, mpi_type; switch (CCTK_VarTypeI (index)) { case CCTK_VARIABLE_CHAR: element_size = sizeof (CCTK_CHAR); mpi_type = PUGH_MPI_CHAR; break; case CCTK_VARIABLE_INT: element_size = sizeof (CCTK_INT); mpi_type = PUGH_MPI_INT; break; case CCTK_VARIABLE_REAL: element_size = sizeof (CCTK_REAL); mpi_type = PUGH_MPI_REAL; break; case CCTK_VARIABLE_COMPLEX: CCTK_WARN (1, "Restoring of complex datatypes not yet supported"); continue; default: CCTK_WARN (1, "Unknown datatype in IOFlexIO_RestoreIEEEIOfile"); continue; } /* read my own data directly into data, read others data into buffer and communicate it */ if (! file_unchunked || gtype == GROUP_SCALAR) CACTUS_IEEEIO_ERROR (IOread (ifp, GH->data [index][timelevel])); else { chunkdims [0] = pughGH->rnx [file_ioproc]; chunkdims [1] = pughGH->rny [file_ioproc]; chunkdims [2] = pughGH->rnz [file_ioproc]; chunkorigin = pughGH->lb [file_ioproc]; CACTUS_IEEEIO_ERROR (IOreadChunk (ifp, chunkdims, chunkorigin, GH->data [index][timelevel])); } /* read data for non-IO processors */ if (gtype == GROUP_SCALAR) { npoints = 1; buffer = GH->data [index][timelevel]; } else { /* allocate memory for the biggest chunk */ npoints = pughGH->rnpoints [file_ioproc + 1]; for (proc = 2; proc < file_ioproc_every; proc++) if (npoints < pughGH->rnpoints [file_ioproc + proc]) npoints = pughGH->rnpoints [file_ioproc + proc]; buffer = malloc (npoints * element_size); } for (proc = file_ioproc + 1; proc < file_ioproc + file_ioproc_every && proc < nprocs; proc++) { if (gtype != GROUP_SCALAR) { if (! unchunked) { /* Also increment dataset counter here !!! */ currentDataset++; if (GetChunkAttributes (GH, ifp, index)) { sprintf (msg, "Ignoring chunk in dataset %d", currentDataset+1); CCTK_WARN (1, msg); continue; } CACTUS_IEEEIO_ERROR (IOread (ifp, buffer)); } else { chunkdims [0] = pughGH->rnx [proc]; chunkdims [1] = pughGH->rny [proc]; chunkdims [2] = pughGH->rnz [proc]; chunkorigin = pughGH->lb [proc]; CACTUS_IEEEIO_ERROR (IOreadChunk (ifp, chunkdims, chunkorigin, buffer)); } npoints = pughGH->rnpoints [proc]; } /* and finally send the index and the data */ info [0] = index; info [1] = timelevel; CACTUS_MPI_ERROR (MPI_Send (info, 2, PUGH_MPI_INT, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); CACTUS_MPI_ERROR (MPI_Send (buffer, npoints, mpi_type, proc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); } if (gtype != GROUP_GF) free (buffer); } #endif } /* reading data for file_ioproc_every processors */ } /* end of loop over all datasets */ #ifdef MPI /* Finally an invalid variable index is communicated to indicate completion to the non-IO processors. */ info [0] = -1; for (proc = 1; proc < file_ioproc_every; proc++) CACTUS_MPI_ERROR (MPI_Send (info, 2, PUGH_MPI_INT, proc + file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD)); #endif } else { /* And here the code for non-IO processors: */ #ifdef MPI int mpi_type; MPI_Status ms; /* They don't know how many datasets there are, because the IO processors could skip some on the fly during their consistency checks. The IO Processor sends the index of the variable to be processed next. So, all non-IO processors execute a loop where the termination condition is when an invalid index was received. */ while (1) { /* receive the next variable index from the IO processor */ CACTUS_MPI_ERROR (MPI_Recv (info, 2, PUGH_MPI_INT, file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); index = info [0]; timelevel = info [1]; /* check for termination condition */ if (index < 0) break; switch (CCTK_VarTypeI (index)) { case CCTK_VARIABLE_CHAR: mpi_type = PUGH_MPI_CHAR; break; case CCTK_VARIABLE_INT: mpi_type = PUGH_MPI_INT; break; case CCTK_VARIABLE_REAL: mpi_type = PUGH_MPI_REAL; break; case CCTK_VARIABLE_COMPLEX: CCTK_WARN (1, "Restoring of complex datatypes not yet supported"); continue; default: CCTK_WARN (1, "Unknown datatype in IOFlexIO_RestoreIEEEIOfile"); continue; } /* receive following data from my IO processor */ CACTUS_MPI_ERROR (MPI_Recv (GH->data [index][timelevel], pughGH->npoints, mpi_type, file_ioproc, STARTUPBASE, pughGH->PUGH_COMM_WORLD, &ms)); } #endif } #endif /* CACTUSPUGH_PUGH */ return (0); }