From 576b118d06f0bb926fac042c0e44a81d0dc7b431 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 18 Oct 2020 16:16:31 +0200 Subject: targets: implement an SSH+LVM target --- lbup/_mountinfo.py | 123 ++++++++++++++++++++++++++++++++++ lbup/_path.py | 57 ++++++++++++++++ lbup/targets.py | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 360 insertions(+), 11 deletions(-) create mode 100644 lbup/_mountinfo.py create mode 100644 lbup/_path.py diff --git a/lbup/_mountinfo.py b/lbup/_mountinfo.py new file mode 100644 index 0000000..b4d79ae --- /dev/null +++ b/lbup/_mountinfo.py @@ -0,0 +1,123 @@ +import os.path + +from ._path import AbsPath + +# oct-unescape certain characters as they are in /proc/mountinfo +# see seq_path[_root]() in linux source +def _oct_unescape(b): + trans = { + br'\040' : b' ', br'\011' : b'\t', + br'\012' : b'\n', br'\0134' : b'\\', + } + + for src, dst in trans.items(): + b = b.replace(src, dst) + + return b + +class _MountEntry: + "mount ID, bytes" + mount_id = None + "parent ID, bytes" + parent_id = None + "device number, int" + devnum = None + "root of the mount, bytes" + root = None + "path where the fs is mounted, bytes" + mount_point = None + "mount options bytes" + mount_opts = None + "optional fields, list of bytes" + opt_fields = None + "filesystem type, bytes" + fstype = None + "mount source, bytes or None" + source = None + "superblock options, bytes or None" + super_opts = None + + "raw mountinfo line, bytes" + raw_entry = None + + def __init__(self, line): + self.raw_entry = line + + items = line.split() + if len(items) < 6: + raise ValueError('Not enough items in a mountinfo line', line) + + mount_id, parent_id, devnum, root, mount_point, mount_opts = items[:6] + self.mount_id = mount_id + self.parent_id = parent_id + + major, minor = map(int, devnum.split(b':')) + if major < 0 or major > 255 or minor < 0 or minor > 255: + raise ValueError('Invalid device number', devnum) + self.devnum = (major << 8) + minor + + self.root = AbsPath(_oct_unescape(root)) + self.mount_point = AbsPath(_oct_unescape(mount_point)) + self.mount_opts = mount_opts + + remainder = items[6:] + opt_fields = [] + while len(remainder) > 0 and remainder[0] != b'-': + opt_fields.append(remainder.pop(0)) + self.opt_fields = opt_fields + + if len(remainder) > 0: + sep = remainder.pop(0) + if sep != b'-': + raise ValueError('Expected separator "-", got', sep) + if len(remainder) > 0: + self.fstype = remainder.pop(0) + if len(remainder) > 0: + self.source = _oct_unescape(remainder.pop(0)) + if len(remainder) > 0: + self.super_opts = remainder.pop(0) + + def __str__(self): + return '({major}:{minor}){src}{root}->{mp}'.format( + major = self.devnum >> 8, minor = self.devnum & 255, + src = ("{%s}" % self.source) if self.source else "", + root = "[%s]" % self.root, mp = self.mount_point) + +class MountInfo: + """ + A wrapper around the contents of the Linux /proc//mountinfo file. + """ + + "a list of _MountEntry" + entries = None + + def __init__(self, data): + self.entries = [] + for line in data.splitlines(): + self.entries.append(_MountEntry(line)) + + def __str__(self): + return '%s(%d entries)' % (self.__class__.__name__, len(self.entries)) + + def entries_for_mountpoint(self, mountpoint): + """ + Iterate over all mountinfo entries mounted at the given mountpoint. + """ + return filter(lambda entry: entry.mount_point == mountpoint, self.entries) + + def mountpoint_for_path(self, path): + """ + Find the longest mountpoint that is a parent of path. + """ + best_match = None + for e in self.entries: + if (path in e.mount_point and + (best_match is None or len(best_match) < len(e.mount_point))): + best_match = e.mount_point + + if best_match is None: + raise LookupError('No mountpoint for', path) + + print(best_match, len(best_match)) + + return best_match diff --git a/lbup/_path.py b/lbup/_path.py new file mode 100644 index 0000000..26c67f8 --- /dev/null +++ b/lbup/_path.py @@ -0,0 +1,57 @@ +class AbsPath: + _components = None + + def __init__(self, path = None, components = None): + if path is not None and components is not None: + raise ValueError('Both path and components provided') + + if path is not None: + if isinstance(path, str): + path = path.encode('utf-8') + if len(path) < 1 or not path.startswith(b'/'): + raise ValueError('Path does not look like valid absolute path', path) + components = list(filter(lambda x: len(x) > 0 and x != b'.', path[1:].split(b'/'))) + if b'..' in components: + raise ValueError('Parent references are not allowed', path) + + self._components = components + + def __bytes__(self): + return self.path + def __repr__(self): + return self.path.decode('utf-8', errors = 'backslashreplace') + + def __contains__(self, item): + c = self.components + ci = item.components + if len(ci) >= len(c) and ci[:len(c)] == c: + return True + return False + + def __eq__(self, other): + return self.components == other.components + + def __len__(self): + return len(self.components) + + @property + def path(self): + return b'/' + b'/'.join(self.components) + + @property + def components(self): + """ + Return a list of path components. + + Note that root does not count as a component, so a path of b'/' returns + an empty list. + """ + return self._components + + def reparent(self, src, dst): + if not self in src: + raise ValueError('Path not in parent', self, src) + tail = self.components[len(src):] + return AbsPath(components = dst.components + tail) + +ROOT = AbsPath('/') diff --git a/lbup/targets.py b/lbup/targets.py index ababd10..cc7b027 100644 --- a/lbup/targets.py +++ b/lbup/targets.py @@ -9,6 +9,8 @@ import socket import subprocess from .exceptions import BackupException, RemoteExecException +from ._mountinfo import MountInfo +from ._path import AbsPath, ROOT from .ssh_remote import SSHRemote from . import repository from . import _ssh_client @@ -48,8 +50,8 @@ class Target(ABC): raise ValueError('One or more dirs to backup required') self.name = name - self.dirs = dirs - self.excludes = excludes + self.dirs = list(map(AbsPath, dirs)) + self.excludes = list(map(AbsPath, excludes)) if logger is None: self._logger = logging.getLogger('%s.%s' % (self.__class__.__name__, self.name)) @@ -80,9 +82,12 @@ class Target(ABC): self._logger.debug('%s stderr: %s' % (name, sanitize(stderr))) def _do_save(self, bup_exec, dry_run, *, - path_prefix = '', index_opts = None, save_opts = None): - excludes = [path_prefix + '/' + e for e in self.excludes] - dirs = [path_prefix + '/' + d for d in self.dirs] + reparent = None, index_opts = None, save_opts = None): + dirs = self.dirs + excludes = self.excludes + if reparent is not None: + dirs = [d.reparent(*reparent) for d in dirs] + excludes = [d.reparent(*reparent) for d in excludes] if index_opts is None: index_opts = [] @@ -91,8 +96,8 @@ class Target(ABC): # index cmd = bup_exec + ['index', '--update', '--one-file-system'] + index_opts - cmd.extend(['--exclude=%s' % e for e in excludes]) - cmd.extend(dirs) + cmd.extend(['--exclude=%s' % str(e) for e in excludes]) + cmd.extend(map(str, dirs)) if dry_run: self._logger.debug('Not executing index command: ' + str(cmd)) @@ -103,7 +108,7 @@ class Target(ABC): res_idx.stdout, res_idx.stderr) # save - cmd = bup_exec + ['save', '-n', self.name] + save_opts + dirs + cmd = bup_exec + ['save', '-n', self.name] + save_opts + list(map(str, dirs)) if dry_run: self._logger.debug('Not executing save command: ' + str(cmd)) else: @@ -157,7 +162,7 @@ class TargetSSH(Target): def __str__(self): return "%s{SSH:%s}" % (super().__str__(), str(self._remote)) - def _paramiko_exec_cmd(self, client, cmd): + def _paramiko_exec_cmd(self, client, cmd, decode = True): self._logger.debug('Client %s: executing command: %s' % (client, cmd)) res = client.exec_command(cmd) @@ -177,7 +182,10 @@ class TargetSSH(Target): self._log_command('Remote command', chan.exit_status, out, err) - return out.decode('utf-8', errors = 'backslashreplace') + if decode: + out = out.decode('utf-8', errors = 'backslashreplace') + + return out def _resolve_remote_bupdir(self, ssh): bupdir = self._paramiko_exec_cmd(ssh, 'realpath -e ' + self._remote_bupdir).splitlines() @@ -194,6 +202,165 @@ class TargetSSH(Target): '-d', remote_bupdir] return self._do_save(bup_exec, dry_run) +class TargetSSHLVM(TargetSSH): + """ + This target backs up a remote host using LVM snapshots. + + All the dirs backed up must be on same LV. + """ + _snapshot_size = None + + def __init__(self, name, dirs, excludes = None, logger = None, + remote = None, remote_bupdir = None, snapshot_size = '20G'): + self._snapshot_size = snapshot_size + + super().__init__(name, dirs, excludes, logger, remote, remote_bupdir) + + def __str__(self): + return "%s{LVM:%s}" % (super().__str__(), self._snapshot_size) + + def _resolve_mntdev(self, ssh, pid = 1): + """ + Find out which LV to snapshot. + + This also checks that all the dirs are on the same LV and no non-trivial + topologies (such as symlinks or bind mounts) are involved, + otherwise a BackupException is raised. + + Return a tuple of (devnum, mountpoint) + """ + # first of all, parse mountinfo + mntinfo = MountInfo( + self._paramiko_exec_cmd(ssh, 'cat /proc/%d/mountinfo' % pid, decode = False)) + + devnum = None + mountpoint = None + for d in self.dirs: + mp = mntinfo.mountpoint_for_path(d) + e = list(mntinfo.entries_for_mountpoint(mp)) + + if len(e) != 1: + raise BackupException('Expected exactly one mountpoint for dir', d, str(e)) + if e[0].root != ROOT: + raise BackupException('Mountpoint is a bind mount, which is not supported', str(e[0])) + dn = e[0].devnum + + if devnum is None: + devnum = dn + mountpoint = mp + continue + + if dn != devnum or mp != mountpoint: + raise BackupException('Mismatching device numbers/mountpoints', + dn, devnum, mp, mountpoint) + + # TODO? check that there are no symlinks? + # by running stat maybe? + + return (devnum, mountpoint) + + def _resolve_lv(self, ssh, devnum): + """ + Find the logical volume for the given device number. + Return its full name, i.e. vgname/lvname + """ + major = devnum >> 8 + minor = devnum & 255 + res = self._paramiko_exec_cmd(ssh, + 'lvs --select "kernel_major={major}&&kernel_minor={minor}" ' + '--noheadings -o lv_full_name'.format(major = major, minor = minor)) + + lv_name = res.strip() + # valid LV paths are volname/lvname, each at most 15 letters + if not re.fullmatch(r'\w{1,15}/\w{1,15}', lv_name, re.ASCII): + raise BackupException('Invalid LV path', lv_name) + + return lv_name + + @contextlib.contextmanager + def _snapshot_lv(self, ssh, devnum): + """ + Return a context manager that creates a read-only LVM snapshot + for the specified LV device number and destroys it at exit. + """ + lv_fullname = self._resolve_lv(ssh, devnum) + self._logger.debug('LV volume to snapshot is %s', lv_fullname) + + vg_name = lv_fullname.split('/')[0] + + # create a read-only snapshot with a random name + # valid LV names are at most 15 characters + snapshot_name = secrets.token_urlsafe()[:15] + snapshot_fullname = '%s/%s' % (vg_name, snapshot_name) + self._paramiko_exec_cmd(ssh, + 'lvcreate --permission r --snapshot -L {size} -n {name} {origin}' + .format(size = self._snapshot_size, name = snapshot_name, + origin = lv_fullname)) + + try: + # get the path to the snapshot device node + res = self._paramiko_exec_cmd(ssh, + 'lvs --select "lv_full_name=%s" --noheadings -o lv_path' % snapshot_fullname) + lv_path = res.strip() + if not lv_path.startswith('/'): + raise BackupException('Got invalid snapshot LV path', lv_path) + + self._logger.debug('Created snapshot %s at %s', snapshot_fullname, lv_path) + + yield lv_path + finally: + self._paramiko_exec_cmd(ssh, 'lvremove -f %s' % snapshot_fullname) + self._logger.debug('Removed snapshot %s', snapshot_fullname) + + @contextlib.contextmanager + def _mount_snapshot(self, ssh, devnum, mount_path): + """ + Return a context manager that creates a read-only LVM snapshot + for the specified LV device number and mounts it at mount_path, + then unmounts and destroys it at exit. + """ + with self._snapshot_lv(ssh, devnum) as lv_path: + try: + self._paramiko_exec_cmd(ssh, 'mount -oro %s %s' % (lv_path, mount_path)) + yield None + finally: + self._paramiko_exec_cmd(ssh, 'umount %s' % mount_path) + + + def save(self, data_dir, dry_run = False): + with contextlib.ExitStack() as stack: + conn_tgt = stack.enter_context(_ssh_client.SSHConnection(self._remote)) + + remote_root = SSHRemote(self._remote.host, self._remote.port, + 'root', self._remote.proxy_remote) + conn_root = stack.enter_context(_ssh_client.SSHConnection(remote_root)) + + # resolve the path to BUP_DIR on the remote + bupdir = self._resolve_remote_bupdir(conn_tgt) + + # make sure the mount directory exists + # due to how bup index works, the mount directory has to stay the + # same for each backup + # we use BUP_DIR/lbup_mount + snapshot_mount = '%s/%s' % (bupdir, 'lbup_mount') + self._paramiko_exec_cmd(conn_tgt, 'mkdir -p -m 700 ' + snapshot_mount) + + devnum, mountpoint = self._resolve_mntdev(conn_tgt) + self._logger.debug('Backup targets are at device %s, mounted at %s', + "%d:%d" % (devnum >> 8, devnum & 255), mountpoint) + + stack.enter_context(self._mount_snapshot(conn_root, devnum, snapshot_mount)) + + save_opts = ['--strip-path', snapshot_mount] + + bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host), + '-d', bupdir] + reparent = (mountpoint, AbsPath(snapshot_mount)) + return self._do_save(bup_exec, dry_run, + reparent = reparent, + save_opts = ['--graft=%s=%s' % (snapshot_mount, mountpoint)], + index_opts = ['--no-check-device']) + class TargetSSHLXCLVM(TargetSSH): """ This target backs up an LXC container that lives on its own LVM logical @@ -327,8 +494,10 @@ class TargetSSHLXCLVM(TargetSSH): bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host), '-d', container_bupdir] + reparent = (ROOT, AbsPath(container_mountpoint)) try: - ret = self._do_save(bup_exec, dry_run, path_prefix = container_mountpoint, + ret = self._do_save(bup_exec, dry_run, + reparent = reparent, save_opts = save_opts, index_opts = ['--no-check-device']) finally: self._paramiko_exec_cmd(parent, -- cgit v1.2.3