summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2018-03-22 12:32:53 +0100
committerAnton Khirnov <anton@khirnov.net>2018-03-22 12:32:53 +0100
commit32fa495b17d5b84e398a70c2d3338486b40dbfe7 (patch)
tree1baf8d77826941377f1135f233acec537b8d2308
parent7daae202d9a1ec7928603a65f5c06b4518c3596e (diff)
Add a new rewritten API.
-rw-r--r--__init__.py0
-rw-r--r--datafile.py270
-rw-r--r--simdata.py136
3 files changed, 406 insertions, 0 deletions
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/__init__.py
diff --git a/datafile.py b/datafile.py
new file mode 100644
index 0000000..2aa752e
--- /dev/null
+++ b/datafile.py
@@ -0,0 +1,270 @@
+"""
+This module provides a wrapper around a single HDF5 file written by Cactus (DataFile) and some helper classes for
+accessing the data.
+"""
+
+import h5py
+import numpy as np
+
+
+class _SliceLayout(object):
+ ### public ###
+
+ nb_dims = None
+ coords = None
+
+ ### private ###
+ _origin = None
+ _delta = None
+ _shape = None
+
+ def __init__(self, origin, delta, shape):
+ if (len(origin) <= 0 or len(origin) != len(delta) or
+ len(origin) != len(shape)):
+ raise ValueError("Invalid slice description")
+
+ self.nb_dims = len(origin)
+ self._origin = origin
+ self._delta = delta
+ self._shape = shape
+
+ self.coords = []
+ for i in range(self.nb_dims):
+ self.coords.append(np.linspace(origin[i], origin[i] + delta[i] * shape[i],
+ shape[i], endpoint = False))
+
+class _DataSlice(object):
+ layout = None
+
+ data = None
+
+ it = None
+ time = None
+
+ def __init__(self, data, layout):
+ self.layout = layout
+ self.data = data
+
+ self.it = data.attrs['timestep']
+ self.time = data.attrs['time']
+
+ def __getitem__(self, key):
+ return self.data[key]
+
+class _DataRefinementLevel(object):
+ n = None
+
+ ### private ###
+ _ds = None
+
+ """number of iterations per time step"""
+ _stepsize_iter = None
+
+ def __init__(self, n, ds):
+ self._ds = ds
+ self.n = n
+
+ if ds._df._stepsize_iter is not None:
+ self._stepsize_iter = ds._df._stepsize_iter / (1 << n)
+
+ def __repr__(self):
+ return 'rl%d@<%s>' % (self.n, self._ds.name)
+
+ def __iter__(self):
+ for it, t in self._ds._df.itertimes[self.n]:
+ yield self._ds.slice(it, rl = self.n)
+ raise StopIteration
+
+ #def _next(self):
+
+
+class _DataSet(object):
+ name = None
+ nb_components = None
+ rl = None
+
+ ### private ###
+ _df = None
+
+ def __init__(self, df, name):
+ self._df = df
+ self.name = name
+
+ # detect the number of components
+ try:
+ self._slice_single_component(self._df._first_iter, 0)
+ self.nb_components = 1
+ except KeyError:
+ c = 0
+ while True:
+ try:
+ self._slice_single_component(self._df._first_iter, 0, c)
+ c += 1
+ except KeyError:
+ break
+ self.nb_components = c
+
+ # detect the number of refinement levels
+ self.rl = []
+ while True:
+ try:
+ self._slice_single_component(self._df._first_iter, len(self.rl))
+ self.rl.append(_DataRefinementLevel(len(self.rl), self))
+ except KeyError:
+ break
+
+ def __repr__(self):
+ return '<%s>@<%s>' % (self.name, self._df.path)
+
+ def _slice_single_component(self, it, rl, component = None):
+ querystr = '%s it=%d tl=0 rl=%d' % (self.name, it, rl)
+ if component is not None:
+ querystr += ' c=%d' % component
+ data = self._df._f[querystr]
+ layout = _SliceLayout(data.attrs['origin'], data.attrs['delta'], data.shape[::-1])
+ return _DataSlice(data, layout)
+
+ def _slice_merge_components(self, it, rl):
+ data = []
+ for i in range(self.nb_components):
+ data.append(self._slice_single_component(it, rl, i))
+
+ nb_dims = data[0].layout.nb_dims
+
+ # get the spatial step sizes
+ step = [None] * nb_dims
+ for i in range(nb_dims):
+ step[i] = abs(data[0].layout.coords[i][1] - data[0].layout.coords[i][0])
+
+ # calculate the extents of the result
+ coord_max = [None] * nb_dims
+ coord_min = [None] * nb_dims
+
+ for d in data:
+ for i in range(nb_dims):
+ minval = d.layout.coords[i][0]
+ maxval = d.layout.coords[i][-1]
+ if coord_max[i] is None or coord_max[i] < maxval:
+ coord_max[i] = maxval
+ if coord_min[i] is None or coord_min[i] > minval:
+ coord_min[i] = minval
+
+ shape = [None] * nb_dims
+ for i in range(nb_dims):
+ shape[i] = int((coord_max[i] - coord_min[i]) / step[i]) + 1
+
+ res = np.empty(shape)
+ for d in data:
+ sl = [None] * nb_dims
+ d_shape = d.data.shape[::-1]
+
+ for i in range(nb_dims):
+ start_idx = int((d.layout.coords[i][0] - coord_min[i]) / step[i])
+ sl[i] = slice(start_idx, start_idx + d_shape[i])
+
+
+ sl = tuple(sl[::-1])
+ res[sl] = d.data[:]
+
+ layout = _SliceLayout(coord_min, step, res.shape[::-1])
+ return _DataSlice(res, layout)
+
+ def slice(self, it = None, time = None, rl = None, component = None):
+ if it is None and time is not None:
+ it = self._df._iter_from_time(time)
+
+ if component is not None or self.nb_components == 1:
+ return self._slice_single_component(it, rl, component)
+ return self._slice_merge_components(it, rl)
+
+class DataFile(object):
+
+ ### public ###
+
+ path = None
+ datasets = None
+
+ ### private ###
+
+ # the h5py file object
+ _f = None
+ _closed = False
+
+ # time duration of one iteration
+ _dt_iter = None
+
+ _first_iter = None
+
+ # number of iterations per coarsest-level time step
+ _stepsize_iter = None
+
+ _itertimes = None
+
+ def __init__(self, path, mode = 'r'):
+ self._f = h5py.File(path, mode)
+ self.path = path
+ try:
+ for it in self._f.itervalues():
+ if self._first_iter is None and 'timestep' in it.attrs:
+ self._first_iter = it.attrs['timestep']
+ if 'time' in it.attrs and it.attrs['time'] > 0.0:
+ self._dt_iter = it.attrs['time'] / it.attrs['timestep']
+ break
+
+ dt = self._f['Parameters and Global Attributes'].attrs['carpet_delta_time']
+ try:
+ self._stepsize_iter = self._iter_from_time(dt)
+ except ValueError:
+ pass
+
+ # retrieve the list of the datasets present in the file
+ datasets_str = self._f['Parameters and Global Attributes']['Datasets'].value
+ self.datasets = []
+ for ds in datasets_str.strip().split():
+ self.datasets.append(_DataSet(self, ds))
+ except:
+ self.close()
+ raise
+
+ def __del__(self):
+ self.close()
+
+ def _iter_from_time(self, time):
+ if self._dt_iter is None:
+ if time == 0.0:
+ return 0
+ raise ValueError("No time step defined in the file: %s" % self.path)
+ return int(time / self._dt_iter)
+
+ def close(self):
+ if self._f and not self._closed:
+ self._f.close()
+ self._closed = True
+
+ def _get_itertimes(self):
+ rls = []
+ datasets = map(lambda ds: (ds.attrs['level'], ds.attrs['timestep'], ds.attrs['time']), filter(lambda x: 'time' in x.attrs, self._f.itervalues()))
+
+ i = 0
+ while True:
+ ds_rl = map(lambda ds: (ds[1], ds[2]), filter(lambda x: x[0] == i, datasets))
+ if len(ds_rl) == 0:
+ break
+
+ rls.append(sorted(ds_rl, cmp = lambda x, y: cmp(x[0], y[0])))
+ i += 1
+
+ return rls
+
+ @property
+ def itertimes(self):
+ if not self._itertimes:
+ self._itertimes = self._get_itertimes()
+ return self._itertimes
+
+ @property
+ def rl(self):
+ return self.datasets[0].rl
+
+ def slice(self, *args, **kwargs):
+ return self.datasets[0].slice(*args, **kwargs)
diff --git a/simdata.py b/simdata.py
new file mode 100644
index 0000000..dd5398e
--- /dev/null
+++ b/simdata.py
@@ -0,0 +1,136 @@
+"""
+This module provides a wrapper around a Cactus simulation output, i.e. a directory of HDF5 files.
+"""
+
+import os
+
+from . import datafile
+
+
+class _RefinedSlice(object):
+ # simulation time on this slice
+ time = -1
+ # simulation iteration on this slice
+ it = -1
+
+ # a list of arrays of coordinate points along each direction [x, y, z]
+ c = None
+
+ # arrays of coordinate points along each direction
+ x = None
+ y = None
+ z = None
+
+ rl = None
+
+ # private
+ _sd = None
+ _rl = None
+
+ def __init__(self, sd, rl, time = -1, iteration = -1):
+ self._sd = sd
+ self._rl = rl
+ self.rl = rl
+
+ if time < 0 and iteration < 0:
+ raise TypeError('Neither time nor iteration provided')
+
+ if time >= 0:
+ self.it = sd._df._iter_from_time(time)
+ else:
+ self.it = iteration
+
+ try:
+ s = sd._df.slice(it = self.it, rl = rl.n)
+ self.time = s.time
+ except KeyError:
+ if time < 0:
+ raise IndexError('No such iteration: ' + str(iteration))
+ else:
+ raise IndexError('No such time: ' + str(time))
+
+ # pick a representative datafile and get the grid properties from it
+ if 'H' in self._sd.df:
+ df = self._sd.df['H']
+ else:
+ df = None
+ for key in self._sd.df:
+ if '.' in key:
+ continue
+ df = self._sd.df[key]
+ break
+ if df is None:
+ raise IndexError
+
+ ds = df.slice(self.it, rl = rl.n)
+ self.c = ds.layout.coords
+ self.x, self.y, self.z = self.c
+
+ def __getitem__(self, key):
+ if key in self._sd.df:
+ return self._sd.df[key].slice(self.it, rl = self._rl.n)
+ raise IndexError
+
+
+class _RefinementLevel(object):
+ # refinement level number
+ n = -1
+
+ # private
+ _sd = None
+
+ def __init__(self, sd, n):
+ self._sd = sd
+ self.n = n
+
+ def slice(self, iteration = -1, time = -1):
+ return _RefinedSlice(self._sd, self, time, iteration)
+
+
+class SimulationData(object):
+ "The full path to the simulation directory"
+ dirname = None
+
+ "A dictionary of { basename : DataFile } items representing the data files. The basename is the file name without the .h5 extension"
+ df = None
+
+ "A dictionary of all the { gridfunction : [datafiles it is located in] } pairs for this set of data"
+ gf = None
+
+ # per-refinement level parameters
+ rl = None
+
+ _df = None
+
+ def __init__(self, dirname):
+ self.dirname = os.path.abspath(dirname)
+ self.df = {}
+ self.gf = {}
+
+ # open all the hdf5 files in the dir
+ for f in os.listdir(dirname):
+ if (not f.endswith('.h5') or f.startswith('checkpoint') or f.endswith('.d.h5')):
+ continue
+
+ self.df[f[:-3]] = datafile.DataFile('%s/%s' % (dirname, f))
+ if len(self.df) == 0:
+ raise ValueError('No HDF5 data files in the directory.')
+
+ for df in self.df.values():
+ for ds in df.datasets:
+ if ds.name in self.gf:
+ self.gf[ds.name].append(df)
+ self.gf[ds.name] = [df]
+
+ # pick a representative datafile and get the grid properties from it
+ if 'H' in self.df:
+ df = self.df['H']
+ else:
+ df = self.df.values()[0]
+
+ self._df = df
+
+ # get the refinement levels, iterations and times
+ self.rl = []
+ for rl in df.rl:
+ self.rl.append(_RefinementLevel(self, len(self.rl)))