Add a new rewritten API.

author: Anton Khirnov <anton@khirnov.net> 2018-03-22 12:32:53 +0100
committer: Anton Khirnov <anton@khirnov.net> 2018-03-22 12:32:53 +0100
commit: 32fa495b17d5b84e398a70c2d3338486b40dbfe7 (patch)
tree: 1baf8d77826941377f1135f233acec537b8d2308
parent: 7daae202d9a1ec7928603a65f5c06b4518c3596e (diff)
3 files changed, 406 insertions, 0 deletions
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/__init__.py
diff --git a/datafile.py b/datafile.py
new file mode 100644
index 0000000..2aa752e
--- /dev/null
+++ b/datafile.py
@@ -0,0 +1,270 @@
+"""
+This module provides a wrapper around a single HDF5 file written by Cactus (DataFile) and some helper classes for
+accessing the data.
+"""
+
+import h5py
+import numpy as np
+
+
+class _SliceLayout(object):
+    ### public ###
+
+    nb_dims = None
+    coords  = None
+
+    ### private ###
+    _origin = None
+    _delta  = None
+    _shape  = None
+
+    def __init__(self, origin, delta, shape):
+        if (len(origin) <= 0 or len(origin) != len(delta) or
+            len(origin) != len(shape)):
+            raise ValueError("Invalid slice description")
+
+        self.nb_dims = len(origin)
+        self._origin = origin
+        self._delta  = delta
+        self._shape  = shape
+
+        self.coords = []
+        for i in range(self.nb_dims):
+            self.coords.append(np.linspace(origin[i], origin[i] + delta[i] * shape[i],
+                                           shape[i], endpoint = False))
+
+class _DataSlice(object):
+    layout = None
+
+    data = None
+
+    it   = None
+    time = None
+
+    def __init__(self, data, layout):
+        self.layout = layout
+        self.data  = data
+
+        self.it   = data.attrs['timestep']
+        self.time = data.attrs['time']
+
+    def __getitem__(self, key):
+        return self.data[key]
+
+class _DataRefinementLevel(object):
+    n = None
+
+    ### private ###
+    _ds        = None
+
+    """number of iterations per time step"""
+    _stepsize_iter = None
+
+    def __init__(self, n, ds):
+        self._ds = ds
+        self.n   = n
+
+        if ds._df._stepsize_iter is not None:
+            self._stepsize_iter = ds._df._stepsize_iter / (1 << n)
+
+    def __repr__(self):
+        return 'rl%d@<%s>' % (self.n, self._ds.name)
+
+    def __iter__(self):
+        for it, t in self._ds._df.itertimes[self.n]:
+            yield self._ds.slice(it, rl = self.n)
+        raise StopIteration
+
+    #def _next(self):
+
+
+class _DataSet(object):
+    name          = None
+    nb_components = None
+    rl            = None
+
+    ### private ###
+    _df = None
+
+    def __init__(self, df, name):
+        self._df  = df
+        self.name = name
+
+        # detect the number of components
+        try:
+            self._slice_single_component(self._df._first_iter, 0)
+            self.nb_components = 1
+        except KeyError:
+            c = 0
+            while True:
+                try:
+                    self._slice_single_component(self._df._first_iter, 0, c)
+                    c += 1
+                except KeyError:
+                    break
+            self.nb_components = c
+
+        # detect the number of refinement levels
+        self.rl   = []
+        while True:
+            try:
+                self._slice_single_component(self._df._first_iter, len(self.rl))
+                self.rl.append(_DataRefinementLevel(len(self.rl), self))
+            except KeyError:
+                break
+
+    def __repr__(self):
+        return '<%s>@<%s>' % (self.name, self._df.path)
+
+    def _slice_single_component(self, it, rl, component = None):
+        querystr = '%s it=%d tl=0 rl=%d' % (self.name, it, rl)
+        if component is not None:
+            querystr += ' c=%d' % component
+        data   = self._df._f[querystr]
+        layout = _SliceLayout(data.attrs['origin'], data.attrs['delta'], data.shape[::-1])
+        return _DataSlice(data, layout)
+
+    def _slice_merge_components(self, it, rl):
+        data = []
+        for i in range(self.nb_components):
+            data.append(self._slice_single_component(it, rl, i))
+
+        nb_dims = data[0].layout.nb_dims
+
+        # get the spatial step sizes
+        step = [None] * nb_dims
+        for i in range(nb_dims):
+            step[i] = abs(data[0].layout.coords[i][1] - data[0].layout.coords[i][0])
+
+        # calculate the extents of the result
+        coord_max = [None] * nb_dims
+        coord_min = [None] * nb_dims
+
+        for d in data:
+            for i in range(nb_dims):
+                minval = d.layout.coords[i][0]
+                maxval = d.layout.coords[i][-1]
+                if coord_max[i] is None or coord_max[i] < maxval:
+                    coord_max[i] = maxval
+                if coord_min[i] is None or coord_min[i] > minval:
+                    coord_min[i] = minval
+
+        shape = [None] * nb_dims
+        for i in range(nb_dims):
+            shape[i] = int((coord_max[i] - coord_min[i]) / step[i]) + 1
+
+        res = np.empty(shape)
+        for d in data:
+            sl = [None] * nb_dims
+            d_shape = d.data.shape[::-1]
+
+            for i in range(nb_dims):
+                start_idx = int((d.layout.coords[i][0] - coord_min[i]) / step[i])
+                sl[i] = slice(start_idx, start_idx + d_shape[i])
+
+
+            sl = tuple(sl[::-1])
+            res[sl] = d.data[:]
+
+        layout = _SliceLayout(coord_min, step, res.shape[::-1])
+        return _DataSlice(res, layout)
+
+    def slice(self, it = None, time = None, rl = None, component = None):
+        if it is None and time is not None:
+            it = self._df._iter_from_time(time)
+
+        if component is not None or self.nb_components == 1:
+            return self._slice_single_component(it, rl, component)
+        return self._slice_merge_components(it, rl)
+
+class DataFile(object):
+
+    ### public ###
+
+    path     = None
+    datasets = None
+
+    ### private ###
+
+    # the h5py file object
+    _f       = None
+    _closed  = False
+
+    # time duration of one iteration
+    _dt_iter = None
+
+    _first_iter = None
+
+    # number of iterations per coarsest-level time step
+    _stepsize_iter = None
+
+    _itertimes = None
+
+    def __init__(self, path, mode = 'r'):
+        self._f   = h5py.File(path, mode)
+        self.path = path
+        try:
+            for it in self._f.itervalues():
+                if self._first_iter is None and 'timestep' in it.attrs:
+                    self._first_iter = it.attrs['timestep']
+                if 'time' in it.attrs and it.attrs['time'] > 0.0:
+                    self._dt_iter = it.attrs['time'] / it.attrs['timestep']
+                    break
+
+            dt = self._f['Parameters and Global Attributes'].attrs['carpet_delta_time']
+            try:
+                self._stepsize_iter = self._iter_from_time(dt)
+            except ValueError:
+                pass
+
+            # retrieve the list of the datasets present in the file
+            datasets_str = self._f['Parameters and Global Attributes']['Datasets'].value
+            self.datasets = []
+            for ds in datasets_str.strip().split():
+                self.datasets.append(_DataSet(self, ds))
+        except:
+            self.close()
+            raise
+
+    def __del__(self):
+        self.close()
+
+    def _iter_from_time(self, time):
+        if self._dt_iter is None:
+            if time == 0.0:
+                return 0
+            raise ValueError("No time step defined in the file: %s" % self.path)
+        return int(time / self._dt_iter)
+
+    def close(self):
+        if self._f and not self._closed:
+            self._f.close()
+            self._closed = True
+
+    def _get_itertimes(self):
+        rls = []
+        datasets = map(lambda ds: (ds.attrs['level'], ds.attrs['timestep'], ds.attrs['time']), filter(lambda x: 'time' in x.attrs, self._f.itervalues()))
+
+        i = 0
+        while True:
+            ds_rl = map(lambda ds: (ds[1], ds[2]), filter(lambda x: x[0] == i, datasets))
+            if len(ds_rl) == 0:
+                break
+
+            rls.append(sorted(ds_rl, cmp = lambda x, y: cmp(x[0], y[0])))
+            i += 1
+
+        return rls
+
+    @property
+    def itertimes(self):
+        if not self._itertimes:
+            self._itertimes = self._get_itertimes()
+        return self._itertimes
+
+    @property
+    def rl(self):
+        return self.datasets[0].rl
+
+    def slice(self, *args, **kwargs):
+        return self.datasets[0].slice(*args, **kwargs)
diff --git a/simdata.py b/simdata.py
new file mode 100644
index 0000000..dd5398e
--- /dev/null
+++ b/simdata.py
@@ -0,0 +1,136 @@
+"""
+This module provides a wrapper around a Cactus simulation output, i.e. a directory of HDF5 files.
+"""
+
+import os
+
+from . import datafile
+
+
+class _RefinedSlice(object):
+    # simulation time on this slice
+    time = -1
+    # simulation iteration on this slice
+    it   = -1
+
+    # a list of arrays of coordinate points along each direction [x, y, z]
+    c = None
+
+    # arrays of coordinate points along each direction
+    x = None
+    y = None
+    z = None
+
+    rl = None
+
+    # private
+    _sd = None
+    _rl = None
+
+    def __init__(self, sd, rl, time = -1, iteration = -1):
+        self._sd = sd
+        self._rl = rl
+        self.rl  = rl
+
+        if time < 0 and iteration < 0:
+            raise TypeError('Neither time nor iteration provided')
+
+        if time >= 0:
+            self.it = sd._df._iter_from_time(time)
+        else:
+            self.it = iteration
+
+        try:
+            s = sd._df.slice(it = self.it, rl = rl.n)
+            self.time = s.time
+        except KeyError:
+            if time < 0:
+                raise IndexError('No such iteration: ' + str(iteration))
+            else:
+                raise IndexError('No such time: ' + str(time))
+
+        # pick a representative datafile and get the grid properties from it
+        if 'H' in self._sd.df:
+            df = self._sd.df['H']
+        else:
+            df = None
+            for key in self._sd.df:
+                if '.' in key:
+                    continue
+                df = self._sd.df[key]
+                break
+            if df is None:
+                raise IndexError
+
+        ds = df.slice(self.it, rl = rl.n)
+        self.c = ds.layout.coords
+        self.x, self.y, self.z = self.c
+
+    def __getitem__(self, key):
+        if key in self._sd.df:
+            return self._sd.df[key].slice(self.it, rl = self._rl.n)
+        raise IndexError
+
+
+class _RefinementLevel(object):
+    # refinement level number
+    n  = -1
+
+    # private
+    _sd = None
+
+    def __init__(self, sd, n):
+        self._sd = sd
+        self.n   = n
+
+    def slice(self, iteration = -1, time = -1):
+        return _RefinedSlice(self._sd, self, time, iteration)
+
+
+class SimulationData(object):
+    "The full path to the simulation directory"
+    dirname = None
+
+    "A dictionary of { basename : DataFile } items representing the data files. The basename is the file name without the .h5 extension"
+    df = None
+
+    "A dictionary of all the { gridfunction : [datafiles it is located in] } pairs for this set of data"
+    gf = None
+
+    # per-refinement level parameters
+    rl = None
+
+    _df = None
+
+    def __init__(self, dirname):
+        self.dirname = os.path.abspath(dirname)
+        self.df = {}
+        self.gf = {}
+
+        # open all the hdf5 files in the dir
+        for f in os.listdir(dirname):
+            if (not f.endswith('.h5') or f.startswith('checkpoint') or f.endswith('.d.h5')):
+                continue
+
+            self.df[f[:-3]] = datafile.DataFile('%s/%s' % (dirname, f))
+        if len(self.df) == 0:
+            raise ValueError('No HDF5 data files in the directory.')
+
+        for df in self.df.values():
+            for ds in df.datasets:
+                if ds.name in self.gf:
+                    self.gf[ds.name].append(df)
+                self.gf[ds.name] = [df]
+
+        # pick a representative datafile and get the grid properties from it
+        if 'H' in self.df:
+            df = self.df['H']
+        else:
+            df = self.df.values()[0]
+
+        self._df = df
+
+        # get the refinement levels, iterations and times
+        self.rl = []
+        for rl in df.rl:
+            self.rl.append(_RefinementLevel(self, len(self.rl)))
author	Anton Khirnov <anton@khirnov.net>	2018-03-22 12:32:53 +0100
committer	Anton Khirnov <anton@khirnov.net>	2018-03-22 12:32:53 +0100
commit	32fa495b17d5b84e398a70c2d3338486b40dbfe7 (patch)
tree	1baf8d77826941377f1135f233acec537b8d2308
parent	7daae202d9a1ec7928603a65f5c06b4518c3596e (diff)