/*@@ @file ieee_recombiner.c @date Fri 14 Dec 2001 @author Thomas Radke @desc Utility program to recombine chunked IEEEIO datafiles into a single unchunked IEEEIO datafile. @enddesc @version $Id$ @@*/ #include #include #include #include /* sysconf(3) */ /* CCTK includes */ #include "cctk.h" /* FlexIO includes */ #include "IEEEIO.h" #include "IOProtos.h" /* the rcs ID and its dummy function to use it */ static const char *rcsid = "$Header$"; CCTK_FILEVERSION(CactusPUGHIO_IOFlexIO_util_ieee_recombiner_c) /* maximum number of dataset dimensions we can deal with (because we are using fixed-sized arrays */ #define MAXDIM 10 /* maximum length of attribute name */ #define MAXNAMELEN 512 /*#define DEBUG 1*/ /* structure describing a dataset */ typedef struct { char *name; /* the variable name */ Int grouptype; /* the variable's grouptype (scalar or array) */ Int timelvl; /* the variable's timelevel */ Int iteration; /* the current iteration number */ int global_rank; /* dimensionality of the variable */ Int global_dims [MAXDIM]; /* the variable's sizes in each dimension */ } common_attrs_t; /* local function prototypes */ static int readCommonAttributes (IOFile file, common_attrs_t *common_attrs); static const char *verifyCommonAttributes (IOFile file, common_attrs_t *common_attrs); static int verifyChunkAttributes (IOFile file, common_attrs_t *common_attrs, int chunk_rank, Int *chunk_origin); static void copyAttributes (IOFile infile, IOFile outfile, int nprocs, int attr_set, int current_set); int main (int argc, char **argv) { int datatype; /* datatype of dataset or attribute */ Long len; /* length of attribute data */ int attno; /* index of current attribute */ Int nprocs; /* total number of processors */ Int ioproc_every; /* IO was performed on every n'th processor */ int nioprocs; /* number of IO procs (= number of input files) */ Int unchunked; /* flag indicating whether file data is unchunked */ int i; /* general looper */ int ioproc; /* looper for the current input file */ int attr_set; /* set to start copying attributes from */ int *current_set; /* array of current set counters for each file */ int chunk_rank; /* dimensions of current chunk */ int chunk_dims [MAXDIM]; /* sizes of current chunk in each dimension */ int *num_chunks; /* number of chunks per dataset for each file */ char **infilenames; /* array of input filenames */ IOFile *infiles, outfile; /* array of input filehandles and output fh */ int max_filehandles; /* maximum number of open files */ common_attrs_t common_attrs; /* to make the compiler happy */ unchunked = 0; ioproc_every = 0; nprocs = 0; datatype = 0; chunk_rank = 0; /* Say hello */ printf ("\n"); printf ("-------------------------------\n"); printf ("Cactus 4 IEEEIO File Recombiner\n"); printf ("-------------------------------\n"); printf ("\n"); /* Give some help if called with incorrect parameters */ if (argc != 3) { printf ("Usage: recombiner \n"); printf (" eg, recombiner alp.file_0.ieee alp.ieee\n\n"); return (0); } /* determine filehandle limit */ max_filehandles = sysconf (_SC_OPEN_MAX); if (max_filehandles < 0) { printf ("Cannot determine filehandle limit\n"); return (-1); } /* subtract stdin, stdout, stderr, and output filehandle */ max_filehandles -= 4; /* open input and output file */ infiles = (IOFile *) malloc (1 * sizeof (IOFile)); infiles [0] = IEEEopen (argv [1], "r"); if (! IOisValid (infiles [0])) { printf ("Cannot open input file '%s' !\n\n", argv [1]); return (-1); } outfile = IEEEopen (argv [2], "w"); if (! IOisValid (outfile)) { printf ("Cannot open output file '%s' !\n\n", argv [2]); return (-1); } /* check if the input file(s) is already unchunked */ attno = IOreadAttributeInfo (infiles [0], "unchunked", &datatype, &len); if (attno < 0) { printf ("Cannot find 'unchunked' attribute !\n"); return (-1); } if (IOreadAttribute (infiles [0], attno, &unchunked) < 0) { printf ("Cannot read 'unchunked' attribute !\n"); return (-1); } if (unchunked) { printf ("IEEEIO file '%s' already contains unchunked data !\n\n", argv [1]); return (0); } /* OK so now read the 'GH$xxx' attributes which describe how the data was written. Note that these attributes are attached to the last chunked dataset of the very first Cactus variable output. Since we don't know yet into how many chunks a dataset is splitted we have to seek here until we find the attributes. */ attno = -1; for (i = 0; i < IOnDatasets (infiles [0]); i++) { IOseek (infiles [0], i); /* find the 'GH$nprocs' attribute, the other attributes must be in that dataset too */ attno = IOreadAttributeInfo (infiles [0], "GH$nprocs", &datatype, &len); if (attno >= 0) break; } if (attno < 0) { printf ("Cannot find 'GH$nprocs' attribute !\n"); return (-1); } if (IOreadAttribute (infiles [0], attno, &nprocs) < 0) { printf ("Cannot read 'GH$nprocs' attribute !\n"); return (-1); } attno = IOreadAttributeInfo (infiles [0], "GH$ioproc_every", &datatype, &len); if (attno < 0) { printf ("Cannot find 'GH$ioproc_every' attribute !\n"); return (-1); } if (IOreadAttribute (infiles [0], attno, &ioproc_every) < 0) { printf ("Cannot read 'GH$ioproc_every' attribute !\n"); return (-1); } nioprocs = nprocs / ioproc_every; if (nprocs % ioproc_every) nioprocs++; printf ("Recombining IEEEIO data from %d processors, " "output was performed by %d processors\n\n", nprocs, nioprocs); /* allocate memory for arrays */ infiles = (IOFile *) realloc (infiles, nioprocs * sizeof (IOFile)); infilenames = (char **) malloc (nioprocs * sizeof (char *)); /* Now check that all chunked input files can be opened */ /* Also collect the number of datasets here */ if (nioprocs == 1) { /* not much to be done here */ infilenames [0] = argv [1]; IEEEbufferOn (infiles [0], -1); } else { char *tmp, fname_template [100]; /* close the file (it might not be the one from processor 0) */ IOclose (infiles [0]); /* Get the basename of input file(s) */ if ((tmp = strstr (argv [1], ".file_")) == NULL) { printf ("Cannot parse file name !\n"); printf ("Is '%s' really a chunked Cactus IEEEIO file ?\n", argv [1]); return (-1); } /* build the filename template */ strncpy (fname_template, argv [1], tmp - argv [1] + 6); fname_template [tmp - argv [1] + 6] = 0; strcat (fname_template, "%d.ieee"); /* now loop through all the files */ for (ioproc = 0; ioproc < nioprocs; ioproc++) { /* build the input filename */ infilenames [ioproc] = (char *) malloc (strlen (fname_template) + 10); sprintf (infilenames [ioproc], fname_template, ioproc); infiles [ioproc] = IEEEopen (infilenames [ioproc], "r"); if (! IOisValid (infiles [ioproc])) { printf ("Cannot open chunked IEEEIO input file '%s' !\n", infilenames [ioproc]); return (-1); } /* close file if filehandle limit would be exceeded */ if (ioproc > max_filehandles) IOclose (infiles [ioproc]); else IEEEbufferOn (infiles [ioproc], -1); } } /* OK great and start going through the files. Generically these will be multiple chunked files. So do an outer loop over nioprocs and an inner loop over the set. This means we need to keep the current set in an array for each file. */ current_set = (int *) calloc (nioprocs, sizeof (int)); num_chunks = (int *) malloc (nioprocs * sizeof (int)); /* compute the number of chunks per array dataset for each file */ for (ioproc = 0; ioproc < nioprocs; ioproc++) num_chunks [ioproc] = ioproc_every; if (nprocs % ioproc_every) num_chunks [nioprocs - 1] = nprocs % ioproc_every; /* seek back to first dataset */ IOseek (infiles [0], 0); /* For each data set in file 0; Note this makes the (implicit but important) assumption that you are working on a set of files which make sense together. */ while (current_set [0] < IOnDatasets (infiles [0])) { /*** now identify the next dataset in file 0 by reading its name, grouptype, timelevel, and iteration attribute ***/ /* advance to the next set */ IOreadInfo (infiles [0], &datatype, &chunk_rank, chunk_dims, MAXDIM); if (readCommonAttributes (infiles [0], &common_attrs) < 0) { printf ("Is file '%s' really a chunked Cactus IEEEIO file ?\n\n", infilenames [0]); return (-1); } /* Find out what kind of data we are dealing with */ if (common_attrs.global_rank != chunk_rank) { printf ("Dataset dimension does not match global size attribute !\n"); return (-1); } printf ("Found variable named '%s'\n", common_attrs.name); printf (" timelevel : %d\n", (int) common_attrs.timelvl); printf (" iteration : %d\n", (int) common_attrs.iteration); printf (" global size: %d", (int) common_attrs.global_dims [0]); for (i = 1; i < common_attrs.global_rank; i++) printf (", %d", (int) common_attrs.global_dims [i]); printf ("\n Copying %s\n", common_attrs.grouptype == CCTK_SCALAR ? "scalar data" : "data chunks"); /* reserve a chunk for array variables */ if (common_attrs.grouptype != CCTK_SCALAR) { int global_dims [MAXDIM]; /* copy from Int to int */ for (i = 0; i < common_attrs.global_rank; i++) global_dims [i] = (int) common_attrs.global_dims [i]; IOreserveChunk (outfile, datatype, common_attrs.global_rank, global_dims); } /* keep the set where we will copy the attributes from later on */ attr_set = current_set [0]; /* loop through all input files and copy the chunks */ for (ioproc = 0; ioproc < nioprocs; ioproc++) { int chunk; Int chunk_origin [MAXDIM]; int chunk_origin_int [MAXDIM]; void *data; /* Re-open the file if it was closed before */ if (ioproc > max_filehandles) { #ifdef DEBUG printf ("reopening input file '%s'\n", infilenames [ioproc]); #endif infiles [ioproc] = IEEEopen (infilenames [ioproc], "r"); if (! IOisValid (infiles [ioproc])) { printf ("Cannot reopen input file '%s'\n", infilenames [ioproc]); return (-1); } IEEEbufferOn (infiles [ioproc], -1); IOseek (infiles [ioproc], current_set [ioproc]); } /* advance to the next set (if not already there) */ if (ioproc != 0) { const char *error; IOreadInfo (infiles [ioproc], &datatype, &chunk_rank, chunk_dims, MAXDIM); /* verify dataset consistency with file 0 */ if ((error = verifyCommonAttributes (infiles [ioproc], &common_attrs))) { printf ("Dataset inconsistency between input files: %s !\n\n", error); return (-1); } } /* read the data */ data = malloc (IOnBytes (datatype, chunk_rank, chunk_dims)); IOread (infiles [ioproc], data); /* write the data */ /* SCALAR variables are only written once from ioproc 0 */ if (common_attrs.grouptype != CCTK_SCALAR) { /* check consistency between chunks and dataset */ if (verifyChunkAttributes (infiles [ioproc], &common_attrs, chunk_rank, chunk_origin) < 0) { printf ("Dataset inconsistency between input files !\n\n"); return (-1); } /* copy from Int to int */ for (i = 0; i < chunk_rank; i++) chunk_origin_int [i] = (int) chunk_origin [i]; IOwriteChunk (outfile, chunk_dims, chunk_origin_int, data); printf (" - file %d set %d\n", ioproc, current_set [ioproc]); printf (" chunk dimensions: %d", chunk_dims [0]); for (i = 1; i < chunk_rank; i++) printf (", %d", chunk_dims [i]); printf (" chunk origin: %d", chunk_origin_int [0]); for (i = 1; i < chunk_rank; i++) printf (", %d", chunk_origin_int [i]); printf ("\n"); } else if (ioproc == 0) { IOwrite (outfile, datatype, chunk_rank, chunk_dims, data); } free (data); /* increment set counter */ current_set [ioproc]++; /* for array variables: read the remaining chunks */ if (common_attrs.grouptype != CCTK_SCALAR) { for (chunk = 1; chunk < num_chunks [ioproc]; chunk++) { /* advance file pointer to the next set */ IOreadInfo (infiles [ioproc], &datatype, &chunk_rank, chunk_dims, MAXDIM); /* check consistency between chunks and dataset */ if (verifyChunkAttributes (infiles [ioproc], &common_attrs, chunk_rank, chunk_origin) < 0) { printf ("Dataset inconsistency between input files !\n\n"); return (-1); } /* copy from Int to int */ for (i = 0; i < chunk_rank; i++) chunk_origin_int [i] = (int) chunk_origin [i]; printf (" - file %d set %d\n", ioproc, current_set [ioproc]); printf (" chunk dimensions: %d", chunk_dims [0]); for (i = 1; i < chunk_rank; i++) printf (", %d", chunk_dims [i]); printf (" chunk origin: %d", chunk_origin_int [0]); for (i = 1; i < chunk_rank; i++) printf (", %d", chunk_origin_int [i]); printf ("\n"); /* read the data */ data = malloc (IOnBytes (datatype, chunk_rank, chunk_dims)); IOread (infiles [ioproc], data); /* write the data */ IOwriteChunk (outfile, chunk_dims, chunk_origin_int, data); free (data); /* increment set counter */ current_set [ioproc]++; } } /* close input file if filehandle limit would be exceeded */ if (ioproc > max_filehandles) IOclose (infiles [ioproc]); } /* end looping through all ioprocs */ /* now copy all attributes from input file 0 sets to */ printf (" Copying attributes from sets %d to %d of file 0\n", attr_set, current_set [0] - 1); copyAttributes (infiles [0], outfile, nprocs, attr_set, current_set [0]); printf ("\n"); } /* end of looping through all sets of file 0 */ /* close all open files */ for (ioproc = 0; ioproc < nioprocs; ioproc++) { if (ioproc <= max_filehandles) IOclose (infiles [ioproc]); free (infiles [ioproc]); } IOclose (outfile); /* clean up */ if (common_attrs.name) free (common_attrs.name); free (infiles); return (0); } /*************************** local routines **********************************/ int readCommonAttributes (IOFile file, common_attrs_t *common_attrs) { int attr_index; /* attribute index */ int attr_type; /* attribute datatype */ Long attr_len; /* length of attribute data */ /* to make the compiler happy */ attr_len = 0; attr_type = 0; memset (common_attrs->global_dims, 0, sizeof (common_attrs->global_dims)); /* read the name of the dataset */ attr_index = IOreadAttributeInfo (file, "name", &attr_type, &attr_len); if (attr_index < 0) { printf ("Cannot find name of dataset !\n"); return (-1); } common_attrs->name = (char *) malloc ((attr_len+1) * sizeof (char)); IOreadAttribute (file, attr_index, common_attrs->name); /* read the group type of the dataset */ /* assume CCTK_GF for Cactus 3 data files */ attr_index = IOreadAttributeInfo (file, "grouptype", &attr_type, &attr_len); if (attr_index < 0 || attr_type != INT32 || attr_len != 1) { common_attrs->grouptype = CCTK_GF; } else { IOreadAttribute (file, attr_index, &common_attrs->grouptype); } /* read the timelevel */ /* assume timelevel 0 for Cactus 3 data files */ attr_index = IOreadAttributeInfo (file, "timelevel", &attr_type, &attr_len); if (attr_index < 0 || attr_type != INT32 || attr_len != 1) { common_attrs->timelvl = 0; } else { IOreadAttribute (file, attr_index, &common_attrs->timelvl); } /* read the iteration number */ attr_index = IOreadAttributeInfo (file, "iteration", &attr_type, &attr_len); if (attr_index < 0 || attr_type != INT32 || attr_len != 1) { printf ("Cannot find iteration attribute !\n"); return (-1); } IOreadAttribute (file, attr_index, &common_attrs->iteration); /* read the global size of the dataset */ attr_index = IOreadAttributeInfo (file, "global_size", &attr_type, &attr_len); if (attr_index < 0 || attr_type != INT32) { printf ("Cannot find global size attribute !\n"); return (-1); } if (attr_len >= MAXDIM) { printf ("Buffer overflow reading global size attribute !\n"); return (-1); } IOreadAttribute (file, attr_index, common_attrs->global_dims); common_attrs->global_rank = (int) attr_len; return (0); } const char *verifyCommonAttributes (IOFile file, common_attrs_t *common_attrs) { const char *retval = NULL; common_attrs_t file_attrs; if (readCommonAttributes (file, &file_attrs) < 0) { retval = "Could not read common attributes"; } else { if (strcmp (file_attrs.name, common_attrs->name)) retval = "dataset names differ"; else if (file_attrs.grouptype != common_attrs->grouptype) retval = "group types differ"; else if (file_attrs.timelvl != common_attrs->timelvl) retval = "timelevel differs"; else if (file_attrs.iteration != common_attrs->iteration) retval = "iteration number differs"; else if (file_attrs.global_rank != common_attrs->global_rank) retval = "global size differs"; else if (memcmp (file_attrs.global_dims, common_attrs->global_dims, file_attrs.global_rank * sizeof (Int))) retval = "global size differs"; } if (file_attrs.name) free (file_attrs.name); return (retval); } int verifyChunkAttributes (IOFile file, common_attrs_t *common_attrs, int chunk_rank, Int *chunk_origin) { int i; /* looper */ int attr_index; /* attribute index */ int attr_type; /* attribute datatype */ Long attr_len; /* length of attribute data */ char *chunk_name; /* chunk's name */ Int chunk_iteration; /* chunk's iteration number */ Int global_dims [MAXDIM]; /* chunk's global size information */ /* to make the compiler happy */ chunk_iteration = 0; attr_len = 0; attr_type = 0; if (chunk_rank != common_attrs->global_rank) { printf ("Chunk rank doesn't match dataset rank !\n"); return (-1); } /* read the chunk's name */ attr_index = IOreadAttributeInfo (file, "name", &attr_type, &attr_len); if (attr_index < 0) { printf ("Cannot find name of chunk !\n"); return (-1); } chunk_name = (char *) malloc ((attr_len+1) * sizeof (char)); IOreadAttribute (file, attr_index, chunk_name); if (strcmp (chunk_name, common_attrs->name)) { printf ("Chunk name doesn't match dataset name !\n"); free (chunk_name); return (-1); } free (chunk_name); /* read the chunk's iteration number */ attr_index = IOreadAttributeInfo (file, "chunk_dataset",&attr_type,&attr_len); if (attr_index < 0 || attr_type != INT32 || attr_len != 1) { printf ("Cannot find chunk_dataset attribute !\n"); return (-1); } IOreadAttribute (file, attr_index, &chunk_iteration); if (chunk_iteration != common_attrs->iteration) { printf("Chunk iteration number doesn't match dataset iteration number !\n"); return (-1); } /* read the chunk's global size */ attr_index = IOreadAttributeInfo (file, "global_size", &attr_type, &attr_len); if (attr_index < 0 || attr_type != INT32) { printf ("Cannot find chunk's global size attribute !\n"); return (-1); } if (attr_len >= MAXDIM) { printf ("Buffer overflow reading chunk's global size attribute !\n"); return (-1); } if ((int) attr_len != common_attrs->global_rank) { printf ("Chunk's global sizes' dimensions don't match dataset !\n"); return (-1); } IOreadAttribute (file, attr_index, global_dims); for (i = 0; i < chunk_rank; i++) if (global_dims [i] != common_attrs->global_dims [i]) { printf ("Chunk's global size doesn't match dataset's global size !\n"); return (-1); } /* read the chunk's origin */ attr_index = IOreadAttributeInfo (file, "chunk_origin", &attr_type,&attr_len); if (attr_index < 0 || attr_type != INT32) { printf ("Cannot find chunk's origin attribute !\n"); return (-1); } if (attr_len >= MAXDIM) { printf ("Buffer overflow reading chunk's origin attribute !\n"); return (-1); } if ((int) attr_len != common_attrs->global_rank) { printf ("Chunk's origin dimensions don't match dataset !\n"); return (-1); } IOreadAttribute (file, attr_index, chunk_origin); return (0); } /*** copyAttributes() copies all attributes from sets up to but excluding of the given input file to the current set of the output file. The special attributes 'GH$nprocs', 'GH$ioproc_every', and 'unchunked' are overridden by appropriate values for unchunked output. This routines leaves the input file pointer set to . ***/ void copyAttributes (IOFile infile, IOFile outfile, int nprocs, int attr_set, int current_set) { int attr, num_attrs; char name [MAXNAMELEN]; void *buffer; Long num_elems; int tmp_int; int datatype; /* to make the compiler happy */ num_elems = 0; datatype = 0; for (; attr_set < current_set; attr_set++) { IOseek (infile, attr_set); num_attrs = IOnAttributes (infile); for (attr = 0; attr < num_attrs; attr++) { IOreadIndexedAttributeInfo (infile, attr, name, &datatype, &num_elems, MAXNAMELEN); #ifdef DEBUG printf ("Reading attribute : %s\n", name); #endif /* copy from Long to int */ tmp_int = num_elems; buffer = malloc (IOnBytes (datatype, 1, &tmp_int)); IOreadAttribute (infile, attr, buffer); /* reset 'GH$nprocs', 'GH$ioproc_every', and 'unchunked' attributes in the unchunked file */ if (! strcmp (name, "GH$nprocs")) *(Int *) buffer = 1; if (! strcmp (name, "GH$ioproc_every")) *(Int *) buffer = nprocs; if (! strcmp (name, "unchunked")) *(Int *) buffer = 1; IOwriteAttribute (outfile, name, datatype, num_elems, buffer); free (buffer); } } /* seek to which is the next one to be processed */ IOseek (infile, current_set); }