summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2020-10-18 16:16:31 +0200
committerAnton Khirnov <anton@khirnov.net>2020-10-18 16:17:35 +0200
commit576b118d06f0bb926fac042c0e44a81d0dc7b431 (patch)
treefddc55f7b440dbf38121e5e2033265e082975499
parent907d41b765fa3edc6103e7f230f35648e41ecc13 (diff)
targets: implement an SSH+LVM target
-rw-r--r--lbup/_mountinfo.py123
-rw-r--r--lbup/_path.py57
-rw-r--r--lbup/targets.py191
3 files changed, 360 insertions, 11 deletions
diff --git a/lbup/_mountinfo.py b/lbup/_mountinfo.py
new file mode 100644
index 0000000..b4d79ae
--- /dev/null
+++ b/lbup/_mountinfo.py
@@ -0,0 +1,123 @@
+import os.path
+
+from ._path import AbsPath
+
+# oct-unescape certain characters as they are in /proc/mountinfo
+# see seq_path[_root]() in linux source
+def _oct_unescape(b):
+ trans = {
+ br'\040' : b' ', br'\011' : b'\t',
+ br'\012' : b'\n', br'\0134' : b'\\',
+ }
+
+ for src, dst in trans.items():
+ b = b.replace(src, dst)
+
+ return b
+
+class _MountEntry:
+ "mount ID, bytes"
+ mount_id = None
+ "parent ID, bytes"
+ parent_id = None
+ "device number, int"
+ devnum = None
+ "root of the mount, bytes"
+ root = None
+ "path where the fs is mounted, bytes"
+ mount_point = None
+ "mount options bytes"
+ mount_opts = None
+ "optional fields, list of bytes"
+ opt_fields = None
+ "filesystem type, bytes"
+ fstype = None
+ "mount source, bytes or None"
+ source = None
+ "superblock options, bytes or None"
+ super_opts = None
+
+ "raw mountinfo line, bytes"
+ raw_entry = None
+
+ def __init__(self, line):
+ self.raw_entry = line
+
+ items = line.split()
+ if len(items) < 6:
+ raise ValueError('Not enough items in a mountinfo line', line)
+
+ mount_id, parent_id, devnum, root, mount_point, mount_opts = items[:6]
+ self.mount_id = mount_id
+ self.parent_id = parent_id
+
+ major, minor = map(int, devnum.split(b':'))
+ if major < 0 or major > 255 or minor < 0 or minor > 255:
+ raise ValueError('Invalid device number', devnum)
+ self.devnum = (major << 8) + minor
+
+ self.root = AbsPath(_oct_unescape(root))
+ self.mount_point = AbsPath(_oct_unescape(mount_point))
+ self.mount_opts = mount_opts
+
+ remainder = items[6:]
+ opt_fields = []
+ while len(remainder) > 0 and remainder[0] != b'-':
+ opt_fields.append(remainder.pop(0))
+ self.opt_fields = opt_fields
+
+ if len(remainder) > 0:
+ sep = remainder.pop(0)
+ if sep != b'-':
+ raise ValueError('Expected separator "-", got', sep)
+ if len(remainder) > 0:
+ self.fstype = remainder.pop(0)
+ if len(remainder) > 0:
+ self.source = _oct_unescape(remainder.pop(0))
+ if len(remainder) > 0:
+ self.super_opts = remainder.pop(0)
+
+ def __str__(self):
+ return '({major}:{minor}){src}{root}->{mp}'.format(
+ major = self.devnum >> 8, minor = self.devnum & 255,
+ src = ("{%s}" % self.source) if self.source else "",
+ root = "[%s]" % self.root, mp = self.mount_point)
+
+class MountInfo:
+ """
+ A wrapper around the contents of the Linux /proc/<pid>/mountinfo file.
+ """
+
+ "a list of _MountEntry"
+ entries = None
+
+ def __init__(self, data):
+ self.entries = []
+ for line in data.splitlines():
+ self.entries.append(_MountEntry(line))
+
+ def __str__(self):
+ return '%s(%d entries)' % (self.__class__.__name__, len(self.entries))
+
+ def entries_for_mountpoint(self, mountpoint):
+ """
+ Iterate over all mountinfo entries mounted at the given mountpoint.
+ """
+ return filter(lambda entry: entry.mount_point == mountpoint, self.entries)
+
+ def mountpoint_for_path(self, path):
+ """
+ Find the longest mountpoint that is a parent of path.
+ """
+ best_match = None
+ for e in self.entries:
+ if (path in e.mount_point and
+ (best_match is None or len(best_match) < len(e.mount_point))):
+ best_match = e.mount_point
+
+ if best_match is None:
+ raise LookupError('No mountpoint for', path)
+
+ print(best_match, len(best_match))
+
+ return best_match
diff --git a/lbup/_path.py b/lbup/_path.py
new file mode 100644
index 0000000..26c67f8
--- /dev/null
+++ b/lbup/_path.py
@@ -0,0 +1,57 @@
+class AbsPath:
+ _components = None
+
+ def __init__(self, path = None, components = None):
+ if path is not None and components is not None:
+ raise ValueError('Both path and components provided')
+
+ if path is not None:
+ if isinstance(path, str):
+ path = path.encode('utf-8')
+ if len(path) < 1 or not path.startswith(b'/'):
+ raise ValueError('Path does not look like valid absolute path', path)
+ components = list(filter(lambda x: len(x) > 0 and x != b'.', path[1:].split(b'/')))
+ if b'..' in components:
+ raise ValueError('Parent references are not allowed', path)
+
+ self._components = components
+
+ def __bytes__(self):
+ return self.path
+ def __repr__(self):
+ return self.path.decode('utf-8', errors = 'backslashreplace')
+
+ def __contains__(self, item):
+ c = self.components
+ ci = item.components
+ if len(ci) >= len(c) and ci[:len(c)] == c:
+ return True
+ return False
+
+ def __eq__(self, other):
+ return self.components == other.components
+
+ def __len__(self):
+ return len(self.components)
+
+ @property
+ def path(self):
+ return b'/' + b'/'.join(self.components)
+
+ @property
+ def components(self):
+ """
+ Return a list of path components.
+
+ Note that root does not count as a component, so a path of b'/' returns
+ an empty list.
+ """
+ return self._components
+
+ def reparent(self, src, dst):
+ if not self in src:
+ raise ValueError('Path not in parent', self, src)
+ tail = self.components[len(src):]
+ return AbsPath(components = dst.components + tail)
+
+ROOT = AbsPath('/')
diff --git a/lbup/targets.py b/lbup/targets.py
index ababd10..cc7b027 100644
--- a/lbup/targets.py
+++ b/lbup/targets.py
@@ -9,6 +9,8 @@ import socket
import subprocess
from .exceptions import BackupException, RemoteExecException
+from ._mountinfo import MountInfo
+from ._path import AbsPath, ROOT
from .ssh_remote import SSHRemote
from . import repository
from . import _ssh_client
@@ -48,8 +50,8 @@ class Target(ABC):
raise ValueError('One or more dirs to backup required')
self.name = name
- self.dirs = dirs
- self.excludes = excludes
+ self.dirs = list(map(AbsPath, dirs))
+ self.excludes = list(map(AbsPath, excludes))
if logger is None:
self._logger = logging.getLogger('%s.%s' % (self.__class__.__name__, self.name))
@@ -80,9 +82,12 @@ class Target(ABC):
self._logger.debug('%s stderr: %s' % (name, sanitize(stderr)))
def _do_save(self, bup_exec, dry_run, *,
- path_prefix = '', index_opts = None, save_opts = None):
- excludes = [path_prefix + '/' + e for e in self.excludes]
- dirs = [path_prefix + '/' + d for d in self.dirs]
+ reparent = None, index_opts = None, save_opts = None):
+ dirs = self.dirs
+ excludes = self.excludes
+ if reparent is not None:
+ dirs = [d.reparent(*reparent) for d in dirs]
+ excludes = [d.reparent(*reparent) for d in excludes]
if index_opts is None:
index_opts = []
@@ -91,8 +96,8 @@ class Target(ABC):
# index
cmd = bup_exec + ['index', '--update', '--one-file-system'] + index_opts
- cmd.extend(['--exclude=%s' % e for e in excludes])
- cmd.extend(dirs)
+ cmd.extend(['--exclude=%s' % str(e) for e in excludes])
+ cmd.extend(map(str, dirs))
if dry_run:
self._logger.debug('Not executing index command: ' + str(cmd))
@@ -103,7 +108,7 @@ class Target(ABC):
res_idx.stdout, res_idx.stderr)
# save
- cmd = bup_exec + ['save', '-n', self.name] + save_opts + dirs
+ cmd = bup_exec + ['save', '-n', self.name] + save_opts + list(map(str, dirs))
if dry_run:
self._logger.debug('Not executing save command: ' + str(cmd))
else:
@@ -157,7 +162,7 @@ class TargetSSH(Target):
def __str__(self):
return "%s{SSH:%s}" % (super().__str__(), str(self._remote))
- def _paramiko_exec_cmd(self, client, cmd):
+ def _paramiko_exec_cmd(self, client, cmd, decode = True):
self._logger.debug('Client %s: executing command: %s' % (client, cmd))
res = client.exec_command(cmd)
@@ -177,7 +182,10 @@ class TargetSSH(Target):
self._log_command('Remote command', chan.exit_status, out, err)
- return out.decode('utf-8', errors = 'backslashreplace')
+ if decode:
+ out = out.decode('utf-8', errors = 'backslashreplace')
+
+ return out
def _resolve_remote_bupdir(self, ssh):
bupdir = self._paramiko_exec_cmd(ssh, 'realpath -e ' + self._remote_bupdir).splitlines()
@@ -194,6 +202,165 @@ class TargetSSH(Target):
'-d', remote_bupdir]
return self._do_save(bup_exec, dry_run)
+class TargetSSHLVM(TargetSSH):
+ """
+ This target backs up a remote host using LVM snapshots.
+
+ All the dirs backed up must be on same LV.
+ """
+ _snapshot_size = None
+
+ def __init__(self, name, dirs, excludes = None, logger = None,
+ remote = None, remote_bupdir = None, snapshot_size = '20G'):
+ self._snapshot_size = snapshot_size
+
+ super().__init__(name, dirs, excludes, logger, remote, remote_bupdir)
+
+ def __str__(self):
+ return "%s{LVM:%s}" % (super().__str__(), self._snapshot_size)
+
+ def _resolve_mntdev(self, ssh, pid = 1):
+ """
+ Find out which LV to snapshot.
+
+ This also checks that all the dirs are on the same LV and no non-trivial
+ topologies (such as symlinks or bind mounts) are involved,
+ otherwise a BackupException is raised.
+
+ Return a tuple of (devnum, mountpoint)
+ """
+ # first of all, parse mountinfo
+ mntinfo = MountInfo(
+ self._paramiko_exec_cmd(ssh, 'cat /proc/%d/mountinfo' % pid, decode = False))
+
+ devnum = None
+ mountpoint = None
+ for d in self.dirs:
+ mp = mntinfo.mountpoint_for_path(d)
+ e = list(mntinfo.entries_for_mountpoint(mp))
+
+ if len(e) != 1:
+ raise BackupException('Expected exactly one mountpoint for dir', d, str(e))
+ if e[0].root != ROOT:
+ raise BackupException('Mountpoint is a bind mount, which is not supported', str(e[0]))
+ dn = e[0].devnum
+
+ if devnum is None:
+ devnum = dn
+ mountpoint = mp
+ continue
+
+ if dn != devnum or mp != mountpoint:
+ raise BackupException('Mismatching device numbers/mountpoints',
+ dn, devnum, mp, mountpoint)
+
+ # TODO? check that there are no symlinks?
+ # by running stat maybe?
+
+ return (devnum, mountpoint)
+
+ def _resolve_lv(self, ssh, devnum):
+ """
+ Find the logical volume for the given device number.
+ Return its full name, i.e. vgname/lvname
+ """
+ major = devnum >> 8
+ minor = devnum & 255
+ res = self._paramiko_exec_cmd(ssh,
+ 'lvs --select "kernel_major={major}&&kernel_minor={minor}" '
+ '--noheadings -o lv_full_name'.format(major = major, minor = minor))
+
+ lv_name = res.strip()
+ # valid LV paths are volname/lvname, each at most 15 letters
+ if not re.fullmatch(r'\w{1,15}/\w{1,15}', lv_name, re.ASCII):
+ raise BackupException('Invalid LV path', lv_name)
+
+ return lv_name
+
+ @contextlib.contextmanager
+ def _snapshot_lv(self, ssh, devnum):
+ """
+ Return a context manager that creates a read-only LVM snapshot
+ for the specified LV device number and destroys it at exit.
+ """
+ lv_fullname = self._resolve_lv(ssh, devnum)
+ self._logger.debug('LV volume to snapshot is %s', lv_fullname)
+
+ vg_name = lv_fullname.split('/')[0]
+
+ # create a read-only snapshot with a random name
+ # valid LV names are at most 15 characters
+ snapshot_name = secrets.token_urlsafe()[:15]
+ snapshot_fullname = '%s/%s' % (vg_name, snapshot_name)
+ self._paramiko_exec_cmd(ssh,
+ 'lvcreate --permission r --snapshot -L {size} -n {name} {origin}'
+ .format(size = self._snapshot_size, name = snapshot_name,
+ origin = lv_fullname))
+
+ try:
+ # get the path to the snapshot device node
+ res = self._paramiko_exec_cmd(ssh,
+ 'lvs --select "lv_full_name=%s" --noheadings -o lv_path' % snapshot_fullname)
+ lv_path = res.strip()
+ if not lv_path.startswith('/'):
+ raise BackupException('Got invalid snapshot LV path', lv_path)
+
+ self._logger.debug('Created snapshot %s at %s', snapshot_fullname, lv_path)
+
+ yield lv_path
+ finally:
+ self._paramiko_exec_cmd(ssh, 'lvremove -f %s' % snapshot_fullname)
+ self._logger.debug('Removed snapshot %s', snapshot_fullname)
+
+ @contextlib.contextmanager
+ def _mount_snapshot(self, ssh, devnum, mount_path):
+ """
+ Return a context manager that creates a read-only LVM snapshot
+ for the specified LV device number and mounts it at mount_path,
+ then unmounts and destroys it at exit.
+ """
+ with self._snapshot_lv(ssh, devnum) as lv_path:
+ try:
+ self._paramiko_exec_cmd(ssh, 'mount -oro %s %s' % (lv_path, mount_path))
+ yield None
+ finally:
+ self._paramiko_exec_cmd(ssh, 'umount %s' % mount_path)
+
+
+ def save(self, data_dir, dry_run = False):
+ with contextlib.ExitStack() as stack:
+ conn_tgt = stack.enter_context(_ssh_client.SSHConnection(self._remote))
+
+ remote_root = SSHRemote(self._remote.host, self._remote.port,
+ 'root', self._remote.proxy_remote)
+ conn_root = stack.enter_context(_ssh_client.SSHConnection(remote_root))
+
+ # resolve the path to BUP_DIR on the remote
+ bupdir = self._resolve_remote_bupdir(conn_tgt)
+
+ # make sure the mount directory exists
+ # due to how bup index works, the mount directory has to stay the
+ # same for each backup
+ # we use BUP_DIR/lbup_mount
+ snapshot_mount = '%s/%s' % (bupdir, 'lbup_mount')
+ self._paramiko_exec_cmd(conn_tgt, 'mkdir -p -m 700 ' + snapshot_mount)
+
+ devnum, mountpoint = self._resolve_mntdev(conn_tgt)
+ self._logger.debug('Backup targets are at device %s, mounted at %s',
+ "%d:%d" % (devnum >> 8, devnum & 255), mountpoint)
+
+ stack.enter_context(self._mount_snapshot(conn_root, devnum, snapshot_mount))
+
+ save_opts = ['--strip-path', snapshot_mount]
+
+ bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host),
+ '-d', bupdir]
+ reparent = (mountpoint, AbsPath(snapshot_mount))
+ return self._do_save(bup_exec, dry_run,
+ reparent = reparent,
+ save_opts = ['--graft=%s=%s' % (snapshot_mount, mountpoint)],
+ index_opts = ['--no-check-device'])
+
class TargetSSHLXCLVM(TargetSSH):
"""
This target backs up an LXC container that lives on its own LVM logical
@@ -327,8 +494,10 @@ class TargetSSHLXCLVM(TargetSSH):
bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host),
'-d', container_bupdir]
+ reparent = (ROOT, AbsPath(container_mountpoint))
try:
- ret = self._do_save(bup_exec, dry_run, path_prefix = container_mountpoint,
+ ret = self._do_save(bup_exec, dry_run,
+ reparent = reparent,
save_opts = save_opts, index_opts = ['--no-check-device'])
finally:
self._paramiko_exec_cmd(parent,