From 32fa495b17d5b84e398a70c2d3338486b40dbfe7 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 22 Mar 2018 12:32:53 +0100 Subject: Add a new rewritten API. --- __init__.py | 0 datafile.py | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ simdata.py | 136 ++++++++++++++++++++++++++++++ 3 files changed, 406 insertions(+) create mode 100644 __init__.py create mode 100644 datafile.py create mode 100644 simdata.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datafile.py b/datafile.py new file mode 100644 index 0000000..2aa752e --- /dev/null +++ b/datafile.py @@ -0,0 +1,270 @@ +""" +This module provides a wrapper around a single HDF5 file written by Cactus (DataFile) and some helper classes for +accessing the data. +""" + +import h5py +import numpy as np + + +class _SliceLayout(object): + ### public ### + + nb_dims = None + coords = None + + ### private ### + _origin = None + _delta = None + _shape = None + + def __init__(self, origin, delta, shape): + if (len(origin) <= 0 or len(origin) != len(delta) or + len(origin) != len(shape)): + raise ValueError("Invalid slice description") + + self.nb_dims = len(origin) + self._origin = origin + self._delta = delta + self._shape = shape + + self.coords = [] + for i in range(self.nb_dims): + self.coords.append(np.linspace(origin[i], origin[i] + delta[i] * shape[i], + shape[i], endpoint = False)) + +class _DataSlice(object): + layout = None + + data = None + + it = None + time = None + + def __init__(self, data, layout): + self.layout = layout + self.data = data + + self.it = data.attrs['timestep'] + self.time = data.attrs['time'] + + def __getitem__(self, key): + return self.data[key] + +class _DataRefinementLevel(object): + n = None + + ### private ### + _ds = None + + """number of iterations per time step""" + _stepsize_iter = None + + def __init__(self, n, ds): + self._ds = ds + self.n = n + + if ds._df._stepsize_iter is not None: + self._stepsize_iter = ds._df._stepsize_iter / (1 << n) + + def __repr__(self): + return 'rl%d@<%s>' % (self.n, self._ds.name) + + def __iter__(self): + for it, t in self._ds._df.itertimes[self.n]: + yield self._ds.slice(it, rl = self.n) + raise StopIteration + + #def _next(self): + + +class _DataSet(object): + name = None + nb_components = None + rl = None + + ### private ### + _df = None + + def __init__(self, df, name): + self._df = df + self.name = name + + # detect the number of components + try: + self._slice_single_component(self._df._first_iter, 0) + self.nb_components = 1 + except KeyError: + c = 0 + while True: + try: + self._slice_single_component(self._df._first_iter, 0, c) + c += 1 + except KeyError: + break + self.nb_components = c + + # detect the number of refinement levels + self.rl = [] + while True: + try: + self._slice_single_component(self._df._first_iter, len(self.rl)) + self.rl.append(_DataRefinementLevel(len(self.rl), self)) + except KeyError: + break + + def __repr__(self): + return '<%s>@<%s>' % (self.name, self._df.path) + + def _slice_single_component(self, it, rl, component = None): + querystr = '%s it=%d tl=0 rl=%d' % (self.name, it, rl) + if component is not None: + querystr += ' c=%d' % component + data = self._df._f[querystr] + layout = _SliceLayout(data.attrs['origin'], data.attrs['delta'], data.shape[::-1]) + return _DataSlice(data, layout) + + def _slice_merge_components(self, it, rl): + data = [] + for i in range(self.nb_components): + data.append(self._slice_single_component(it, rl, i)) + + nb_dims = data[0].layout.nb_dims + + # get the spatial step sizes + step = [None] * nb_dims + for i in range(nb_dims): + step[i] = abs(data[0].layout.coords[i][1] - data[0].layout.coords[i][0]) + + # calculate the extents of the result + coord_max = [None] * nb_dims + coord_min = [None] * nb_dims + + for d in data: + for i in range(nb_dims): + minval = d.layout.coords[i][0] + maxval = d.layout.coords[i][-1] + if coord_max[i] is None or coord_max[i] < maxval: + coord_max[i] = maxval + if coord_min[i] is None or coord_min[i] > minval: + coord_min[i] = minval + + shape = [None] * nb_dims + for i in range(nb_dims): + shape[i] = int((coord_max[i] - coord_min[i]) / step[i]) + 1 + + res = np.empty(shape) + for d in data: + sl = [None] * nb_dims + d_shape = d.data.shape[::-1] + + for i in range(nb_dims): + start_idx = int((d.layout.coords[i][0] - coord_min[i]) / step[i]) + sl[i] = slice(start_idx, start_idx + d_shape[i]) + + + sl = tuple(sl[::-1]) + res[sl] = d.data[:] + + layout = _SliceLayout(coord_min, step, res.shape[::-1]) + return _DataSlice(res, layout) + + def slice(self, it = None, time = None, rl = None, component = None): + if it is None and time is not None: + it = self._df._iter_from_time(time) + + if component is not None or self.nb_components == 1: + return self._slice_single_component(it, rl, component) + return self._slice_merge_components(it, rl) + +class DataFile(object): + + ### public ### + + path = None + datasets = None + + ### private ### + + # the h5py file object + _f = None + _closed = False + + # time duration of one iteration + _dt_iter = None + + _first_iter = None + + # number of iterations per coarsest-level time step + _stepsize_iter = None + + _itertimes = None + + def __init__(self, path, mode = 'r'): + self._f = h5py.File(path, mode) + self.path = path + try: + for it in self._f.itervalues(): + if self._first_iter is None and 'timestep' in it.attrs: + self._first_iter = it.attrs['timestep'] + if 'time' in it.attrs and it.attrs['time'] > 0.0: + self._dt_iter = it.attrs['time'] / it.attrs['timestep'] + break + + dt = self._f['Parameters and Global Attributes'].attrs['carpet_delta_time'] + try: + self._stepsize_iter = self._iter_from_time(dt) + except ValueError: + pass + + # retrieve the list of the datasets present in the file + datasets_str = self._f['Parameters and Global Attributes']['Datasets'].value + self.datasets = [] + for ds in datasets_str.strip().split(): + self.datasets.append(_DataSet(self, ds)) + except: + self.close() + raise + + def __del__(self): + self.close() + + def _iter_from_time(self, time): + if self._dt_iter is None: + if time == 0.0: + return 0 + raise ValueError("No time step defined in the file: %s" % self.path) + return int(time / self._dt_iter) + + def close(self): + if self._f and not self._closed: + self._f.close() + self._closed = True + + def _get_itertimes(self): + rls = [] + datasets = map(lambda ds: (ds.attrs['level'], ds.attrs['timestep'], ds.attrs['time']), filter(lambda x: 'time' in x.attrs, self._f.itervalues())) + + i = 0 + while True: + ds_rl = map(lambda ds: (ds[1], ds[2]), filter(lambda x: x[0] == i, datasets)) + if len(ds_rl) == 0: + break + + rls.append(sorted(ds_rl, cmp = lambda x, y: cmp(x[0], y[0]))) + i += 1 + + return rls + + @property + def itertimes(self): + if not self._itertimes: + self._itertimes = self._get_itertimes() + return self._itertimes + + @property + def rl(self): + return self.datasets[0].rl + + def slice(self, *args, **kwargs): + return self.datasets[0].slice(*args, **kwargs) diff --git a/simdata.py b/simdata.py new file mode 100644 index 0000000..dd5398e --- /dev/null +++ b/simdata.py @@ -0,0 +1,136 @@ +""" +This module provides a wrapper around a Cactus simulation output, i.e. a directory of HDF5 files. +""" + +import os + +from . import datafile + + +class _RefinedSlice(object): + # simulation time on this slice + time = -1 + # simulation iteration on this slice + it = -1 + + # a list of arrays of coordinate points along each direction [x, y, z] + c = None + + # arrays of coordinate points along each direction + x = None + y = None + z = None + + rl = None + + # private + _sd = None + _rl = None + + def __init__(self, sd, rl, time = -1, iteration = -1): + self._sd = sd + self._rl = rl + self.rl = rl + + if time < 0 and iteration < 0: + raise TypeError('Neither time nor iteration provided') + + if time >= 0: + self.it = sd._df._iter_from_time(time) + else: + self.it = iteration + + try: + s = sd._df.slice(it = self.it, rl = rl.n) + self.time = s.time + except KeyError: + if time < 0: + raise IndexError('No such iteration: ' + str(iteration)) + else: + raise IndexError('No such time: ' + str(time)) + + # pick a representative datafile and get the grid properties from it + if 'H' in self._sd.df: + df = self._sd.df['H'] + else: + df = None + for key in self._sd.df: + if '.' in key: + continue + df = self._sd.df[key] + break + if df is None: + raise IndexError + + ds = df.slice(self.it, rl = rl.n) + self.c = ds.layout.coords + self.x, self.y, self.z = self.c + + def __getitem__(self, key): + if key in self._sd.df: + return self._sd.df[key].slice(self.it, rl = self._rl.n) + raise IndexError + + +class _RefinementLevel(object): + # refinement level number + n = -1 + + # private + _sd = None + + def __init__(self, sd, n): + self._sd = sd + self.n = n + + def slice(self, iteration = -1, time = -1): + return _RefinedSlice(self._sd, self, time, iteration) + + +class SimulationData(object): + "The full path to the simulation directory" + dirname = None + + "A dictionary of { basename : DataFile } items representing the data files. The basename is the file name without the .h5 extension" + df = None + + "A dictionary of all the { gridfunction : [datafiles it is located in] } pairs for this set of data" + gf = None + + # per-refinement level parameters + rl = None + + _df = None + + def __init__(self, dirname): + self.dirname = os.path.abspath(dirname) + self.df = {} + self.gf = {} + + # open all the hdf5 files in the dir + for f in os.listdir(dirname): + if (not f.endswith('.h5') or f.startswith('checkpoint') or f.endswith('.d.h5')): + continue + + self.df[f[:-3]] = datafile.DataFile('%s/%s' % (dirname, f)) + if len(self.df) == 0: + raise ValueError('No HDF5 data files in the directory.') + + for df in self.df.values(): + for ds in df.datasets: + if ds.name in self.gf: + self.gf[ds.name].append(df) + self.gf[ds.name] = [df] + + # pick a representative datafile and get the grid properties from it + if 'H' in self.df: + df = self.df['H'] + else: + df = self.df.values()[0] + + self._df = df + + # get the refinement levels, iterations and times + self.rl = [] + for rl in df.rl: + self.rl.append(_RefinementLevel(self, len(self.rl))) -- cgit v1.2.3