From b4eff60e774efebe7d312998d2408978b71d90e5 Mon Sep 17 00:00:00 2001 From: tradke Date: Sun, 2 Mar 2003 16:33:14 +0000 Subject: Recombination is done in z-slices now which greatly reduces the number of individual I/O writes. This shortens recombination time by a factor of proc_nx * proc_ny at best. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGHIO/IOHDF5/trunk@162 4825ed28-b72c-4eae-9704-e50c059e567d --- src/util/hdf5_recombiner.c | 137 ++++++++++++++++++++++++++++++++------------- 1 file changed, 98 insertions(+), 39 deletions(-) diff --git a/src/util/hdf5_recombiner.c b/src/util/hdf5_recombiner.c index 9933942..1b11ff3 100644 --- a/src/util/hdf5_recombiner.c +++ b/src/util/hdf5_recombiner.c @@ -557,6 +557,10 @@ static herr_t CopyAttribute (hid_t from, Checks whether the passed group indicates a group with chunked datasets. If so it will recombine these from all chunked input files. + In order to minimize the number of write I/O calls, the + recombination is done in z-slices (the last changing slice + in a 3D dataset). Each z-slice is recombined in memory from the + individual chunks, and then written out by a single H5Dwrite(). @enddesc @calls CopyAttribute @@ -576,13 +580,14 @@ static herr_t CopyAttribute (hid_t from, static int RecombineGroupData (const char *groupname) { int infile, chunk, nchunks; + size_t size; hid_t group, attr, datatype, dataspace; hid_t chunked_datatype, chunked_dataspace, chunked_dataset; hid_t unchunked_datatype, unchunked_dataspace, unchunked_dataset; - hsize_t tmp1, ndims, chunk_ndims, *global_size, *chunk_dims; - hssize_t tmp2, *chunk_origin; - size_t chunksize; - void *chunkdata; + hid_t slice_dataspace; + hsize_t tmp1, ndims, chunk_ndims, *dims, *chunk_dims, *slice_dims; + hssize_t tmp2, *chunk_origin, *slice_origin; + void *data; char *chunkname; H5T_class_t class; @@ -611,11 +616,11 @@ static int RecombineGroupData (const char *groupname) "of type integer !\n", groupname); ndims = 0; } - global_size = NULL; + dims = NULL; if (ndims > 0) { - global_size = calloc (ndims, sizeof (hsize_t)); - CHECK_ERROR (H5Aread (attr, H5T_NATIVE_HSIZE, global_size)); + dims = calloc (ndims, sizeof (hsize_t)); + CHECK_ERROR (H5Aread (attr, H5T_NATIVE_HSIZE, dims)); } CHECK_ERROR (H5Sclose (dataspace)); CHECK_ERROR (H5Tclose (datatype)); @@ -635,20 +640,21 @@ static int RecombineGroupData (const char *groupname) attribute, with the least changing dimension being the first element */ for (chunk_ndims = 0; chunk_ndims < ndims/2; chunk_ndims++) { - tmp1 = global_size[chunk_ndims]; - global_size[chunk_ndims] = global_size[ndims - chunk_ndims - 1]; - global_size[ndims - chunk_ndims - 1] = tmp1; + tmp1 = dims[chunk_ndims]; + dims[chunk_ndims] = dims[ndims - chunk_ndims - 1]; + dims[ndims - chunk_ndims - 1] = tmp1; } - CHECK_ERROR (unchunked_dataspace = H5Screate_simple (ndims, global_size, + CHECK_ERROR (unchunked_dataspace = H5Screate_simple (ndims, dims, NULL)); - /* don't need this anymore */ - free (global_size); /* allocate buffers to read a 'chunk_origin' attribute and the chunk dims */ - chunk_origin = calloc (ndims, sizeof (hssize_t)); - chunk_dims = calloc (ndims, sizeof (hsize_t)); + chunk_origin = calloc (2 * ndims, sizeof (hssize_t)); + chunk_dims = calloc (2 * ndims, sizeof (hsize_t)); + slice_origin = chunk_origin + ndims; + slice_dims = chunk_dims + ndims; - unchunked_dataset = unchunked_datatype = -1; + unchunked_dataset = unchunked_datatype = slice_dataspace = -1; + data = NULL; /* now read all the chunks from all input files and write them into the unchunked output dataset as a hyperslab */ @@ -747,7 +753,7 @@ static int RecombineGroupData (const char *groupname) continue; } - /* read the chunk data */ + /* get the chunk dims */ CHECK_ERROR (chunked_dataspace = H5Dget_space (chunked_dataset)); chunk_ndims = H5Sget_simple_extent_ndims (chunked_dataspace); if (chunk_ndims != ndims) @@ -763,6 +769,18 @@ static int RecombineGroupData (const char *groupname) CHECK_ERROR (H5Sget_simple_extent_dims (chunked_dataspace, chunk_dims, NULL)); + /* check the chunk's dataspace to be a simple one */ + if (H5Sis_simple (chunked_dataspace) <= 0) + { + fprintf (stderr, "WARNING: dataset '%s' is not a simple " + "multidimensional dataset (dataset will be ignored) !\n", + chunkname); + CHECK_ERROR (H5Sclose (chunked_dataspace)); + CHECK_ERROR (H5Dclose (chunked_dataset)); + nerrors++; + continue; + } + /* HDF5 needs the least changing dimension first */ for (chunk_ndims = 0; chunk_ndims < ndims/2; chunk_ndims++) { @@ -770,37 +788,77 @@ static int RecombineGroupData (const char *groupname) chunk_origin[chunk_ndims] = chunk_origin[ndims - chunk_ndims - 1]; chunk_origin[ndims - chunk_ndims - 1] = tmp2; } - CHECK_ERROR (H5Sselect_hyperslab (unchunked_dataspace, H5S_SELECT_SET, - chunk_origin, NULL, chunk_dims, NULL)); - chunksize = H5Tget_size (unchunked_datatype); - if (H5Sis_simple (chunked_dataspace) > 0) + + /* give some info output */ + printf (" - file %d chunk %d\n", infile, chunk + ioproc_every*infile); + printf (" chunk dimensions: [%d", (int) chunk_dims[ndims - 1]); + for (chunk_ndims = 1; chunk_ndims < ndims; chunk_ndims++) { - chunksize *= H5Sget_simple_extent_npoints (chunked_dataspace); + printf (", %d", (int) chunk_dims[ndims - chunk_ndims - 1]); } - if (chunksize > 0) + printf ("] chunk origin: [%d", (int) chunk_origin[ndims - 1]); + for (chunk_ndims = 1; chunk_ndims < ndims; chunk_ndims++) { - /* give some info output */ - printf (" - file %d chunk %d\n", infile,chunk + ioproc_every*infile); - printf (" chunk dimensions: [%d", (int) chunk_dims[ndims - 1]); - for (chunk_ndims = 1; chunk_ndims < ndims; chunk_ndims++) + printf (", %d", (int) chunk_origin[ndims - chunk_ndims - 1]); + } + printf ("]\n"); + + if (chunk_origin[ndims - 1] == 0 && + (ndims == 1 || chunk_origin[ndims - 2] == 0)) + { + memset (slice_origin, 0, ndims * sizeof (hssize_t)); + memcpy (slice_dims, chunk_dims, ndims * sizeof (hsize_t)); + slice_dims[ndims - 1] = dims[ndims - 1]; + if (ndims > 1) { - printf (", %d", (int) chunk_dims[ndims - chunk_ndims - 1]); + slice_dims[ndims - 2] = dims[ndims - 2]; } - printf ("] chunk origin: [%d", (int) chunk_origin[ndims - 1]); - for (chunk_ndims = 1; chunk_ndims < ndims; chunk_ndims++) + CHECK_ERROR (slice_dataspace = H5Screate_simple (ndims, slice_dims, + NULL)); + size = H5Tget_size (unchunked_datatype) * + (size_t) H5Sget_simple_extent_npoints (slice_dataspace); + data = malloc (size); + if (! data) { - printf (", %d", (int) chunk_origin[ndims - chunk_ndims - 1]); + fprintf (stderr, "WARNING: couldn't allocate %d bytes to recombine " + "slice of dataset '%s' !\n", size, chunkname); + CHECK_ERROR (H5Sclose (slice_dataspace)); + CHECK_ERROR (H5Sclose (chunked_dataspace)); + CHECK_ERROR (H5Dclose (chunked_dataset)); + nerrors++; + continue; } - printf ("]\n"); + } + slice_origin[ndims - 1] = chunk_origin[ndims - 1]; + if (ndims > 1) + { + slice_origin[ndims - 2] = chunk_origin[ndims - 2]; + } - /* read the chunk and write it to the unchunked dataset */ - chunkdata = malloc (chunksize); - CHECK_ERROR (H5Dread (chunked_dataset, unchunked_datatype, H5S_ALL, - H5S_ALL, H5P_DEFAULT, chunkdata)); + CHECK_ERROR (H5Sselect_hyperslab (slice_dataspace, H5S_SELECT_SET, + slice_origin, NULL, chunk_dims, NULL)); + CHECK_ERROR (H5Dread (chunked_dataset, unchunked_datatype, slice_dataspace, + H5S_ALL, H5P_DEFAULT, data)); + + /* if this was the last chunk for the current z-slice + then write it back now */ + if (chunk_dims[ndims - 1] + chunk_origin[ndims - 1] == dims[ndims - 1] && + (ndims == 1 || + chunk_dims[ndims - 2] + chunk_origin[ndims - 2] == dims[ndims - 2])) + { + /* reset the x/y-offsets to zero */ + chunk_origin[ndims - 1] = 0; + if (ndims > 1) + { + chunk_origin[ndims - 2] = 0; + } + CHECK_ERROR (H5Sselect_hyperslab (unchunked_dataspace, H5S_SELECT_SET, + chunk_origin, NULL, slice_dims,NULL)); + CHECK_ERROR (H5Sselect_all (slice_dataspace)); CHECK_ERROR (H5Dwrite (unchunked_dataset, unchunked_datatype, - chunked_dataspace, unchunked_dataspace, - H5P_DEFAULT, chunkdata)); - free (chunkdata); + slice_dataspace, unchunked_dataspace, + H5P_DEFAULT, data)); + free (data); } CHECK_ERROR (H5Dclose (chunked_dataset)); CHECK_ERROR (H5Sclose (chunked_dataspace)); @@ -830,6 +888,7 @@ static int RecombineGroupData (const char *groupname) free (chunk_dims); free (chunk_origin); free (chunkname); + free (dims); /* indicate no further processing of this group in H5Giterate() */ return (1); -- cgit v1.2.3