From 32fa495b17d5b84e398a70c2d3338486b40dbfe7 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 22 Mar 2018 12:32:53 +0100 Subject: Add a new rewritten API. --- datafile.py | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 datafile.py (limited to 'datafile.py') diff --git a/datafile.py b/datafile.py new file mode 100644 index 0000000..2aa752e --- /dev/null +++ b/datafile.py @@ -0,0 +1,270 @@ +""" +This module provides a wrapper around a single HDF5 file written by Cactus (DataFile) and some helper classes for +accessing the data. +""" + +import h5py +import numpy as np + + +class _SliceLayout(object): + ### public ### + + nb_dims = None + coords = None + + ### private ### + _origin = None + _delta = None + _shape = None + + def __init__(self, origin, delta, shape): + if (len(origin) <= 0 or len(origin) != len(delta) or + len(origin) != len(shape)): + raise ValueError("Invalid slice description") + + self.nb_dims = len(origin) + self._origin = origin + self._delta = delta + self._shape = shape + + self.coords = [] + for i in range(self.nb_dims): + self.coords.append(np.linspace(origin[i], origin[i] + delta[i] * shape[i], + shape[i], endpoint = False)) + +class _DataSlice(object): + layout = None + + data = None + + it = None + time = None + + def __init__(self, data, layout): + self.layout = layout + self.data = data + + self.it = data.attrs['timestep'] + self.time = data.attrs['time'] + + def __getitem__(self, key): + return self.data[key] + +class _DataRefinementLevel(object): + n = None + + ### private ### + _ds = None + + """number of iterations per time step""" + _stepsize_iter = None + + def __init__(self, n, ds): + self._ds = ds + self.n = n + + if ds._df._stepsize_iter is not None: + self._stepsize_iter = ds._df._stepsize_iter / (1 << n) + + def __repr__(self): + return 'rl%d@<%s>' % (self.n, self._ds.name) + + def __iter__(self): + for it, t in self._ds._df.itertimes[self.n]: + yield self._ds.slice(it, rl = self.n) + raise StopIteration + + #def _next(self): + + +class _DataSet(object): + name = None + nb_components = None + rl = None + + ### private ### + _df = None + + def __init__(self, df, name): + self._df = df + self.name = name + + # detect the number of components + try: + self._slice_single_component(self._df._first_iter, 0) + self.nb_components = 1 + except KeyError: + c = 0 + while True: + try: + self._slice_single_component(self._df._first_iter, 0, c) + c += 1 + except KeyError: + break + self.nb_components = c + + # detect the number of refinement levels + self.rl = [] + while True: + try: + self._slice_single_component(self._df._first_iter, len(self.rl)) + self.rl.append(_DataRefinementLevel(len(self.rl), self)) + except KeyError: + break + + def __repr__(self): + return '<%s>@<%s>' % (self.name, self._df.path) + + def _slice_single_component(self, it, rl, component = None): + querystr = '%s it=%d tl=0 rl=%d' % (self.name, it, rl) + if component is not None: + querystr += ' c=%d' % component + data = self._df._f[querystr] + layout = _SliceLayout(data.attrs['origin'], data.attrs['delta'], data.shape[::-1]) + return _DataSlice(data, layout) + + def _slice_merge_components(self, it, rl): + data = [] + for i in range(self.nb_components): + data.append(self._slice_single_component(it, rl, i)) + + nb_dims = data[0].layout.nb_dims + + # get the spatial step sizes + step = [None] * nb_dims + for i in range(nb_dims): + step[i] = abs(data[0].layout.coords[i][1] - data[0].layout.coords[i][0]) + + # calculate the extents of the result + coord_max = [None] * nb_dims + coord_min = [None] * nb_dims + + for d in data: + for i in range(nb_dims): + minval = d.layout.coords[i][0] + maxval = d.layout.coords[i][-1] + if coord_max[i] is None or coord_max[i] < maxval: + coord_max[i] = maxval + if coord_min[i] is None or coord_min[i] > minval: + coord_min[i] = minval + + shape = [None] * nb_dims + for i in range(nb_dims): + shape[i] = int((coord_max[i] - coord_min[i]) / step[i]) + 1 + + res = np.empty(shape) + for d in data: + sl = [None] * nb_dims + d_shape = d.data.shape[::-1] + + for i in range(nb_dims): + start_idx = int((d.layout.coords[i][0] - coord_min[i]) / step[i]) + sl[i] = slice(start_idx, start_idx + d_shape[i]) + + + sl = tuple(sl[::-1]) + res[sl] = d.data[:] + + layout = _SliceLayout(coord_min, step, res.shape[::-1]) + return _DataSlice(res, layout) + + def slice(self, it = None, time = None, rl = None, component = None): + if it is None and time is not None: + it = self._df._iter_from_time(time) + + if component is not None or self.nb_components == 1: + return self._slice_single_component(it, rl, component) + return self._slice_merge_components(it, rl) + +class DataFile(object): + + ### public ### + + path = None + datasets = None + + ### private ### + + # the h5py file object + _f = None + _closed = False + + # time duration of one iteration + _dt_iter = None + + _first_iter = None + + # number of iterations per coarsest-level time step + _stepsize_iter = None + + _itertimes = None + + def __init__(self, path, mode = 'r'): + self._f = h5py.File(path, mode) + self.path = path + try: + for it in self._f.itervalues(): + if self._first_iter is None and 'timestep' in it.attrs: + self._first_iter = it.attrs['timestep'] + if 'time' in it.attrs and it.attrs['time'] > 0.0: + self._dt_iter = it.attrs['time'] / it.attrs['timestep'] + break + + dt = self._f['Parameters and Global Attributes'].attrs['carpet_delta_time'] + try: + self._stepsize_iter = self._iter_from_time(dt) + except ValueError: + pass + + # retrieve the list of the datasets present in the file + datasets_str = self._f['Parameters and Global Attributes']['Datasets'].value + self.datasets = [] + for ds in datasets_str.strip().split(): + self.datasets.append(_DataSet(self, ds)) + except: + self.close() + raise + + def __del__(self): + self.close() + + def _iter_from_time(self, time): + if self._dt_iter is None: + if time == 0.0: + return 0 + raise ValueError("No time step defined in the file: %s" % self.path) + return int(time / self._dt_iter) + + def close(self): + if self._f and not self._closed: + self._f.close() + self._closed = True + + def _get_itertimes(self): + rls = [] + datasets = map(lambda ds: (ds.attrs['level'], ds.attrs['timestep'], ds.attrs['time']), filter(lambda x: 'time' in x.attrs, self._f.itervalues())) + + i = 0 + while True: + ds_rl = map(lambda ds: (ds[1], ds[2]), filter(lambda x: x[0] == i, datasets)) + if len(ds_rl) == 0: + break + + rls.append(sorted(ds_rl, cmp = lambda x, y: cmp(x[0], y[0]))) + i += 1 + + return rls + + @property + def itertimes(self): + if not self._itertimes: + self._itertimes = self._get_itertimes() + return self._itertimes + + @property + def rl(self): + return self.datasets[0].rl + + def slice(self, *args, **kwargs): + return self.datasets[0].slice(*args, **kwargs) -- cgit v1.2.3