summaryrefslogtreecommitdiff
path: root/lbup
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2020-02-15 11:05:43 +0100
committerAnton Khirnov <anton@khirnov.net>2020-02-15 11:05:43 +0100
commit00bf9f3eafff6f29b4fcf18d7c1f2ab2b1310c16 (patch)
treeb5430fa827e48b6e42e3f49a1b4e188675bef6d3 /lbup
parent1c33c917bbcad2924bb722f77c7aaa4f511a299c (diff)
Rename bupper to lbup.
bupper already exists. lbup is a randomly chosen word containing bup
Diffstat (limited to 'lbup')
-rw-r--r--lbup/__init__.py0
-rw-r--r--lbup/_ssh_client.py51
-rw-r--r--lbup/_sshfp_policy.py61
-rw-r--r--lbup/exceptions.py14
-rw-r--r--lbup/repository.py81
-rw-r--r--lbup/ssh_remote.py25
-rw-r--r--lbup/targets.py314
7 files changed, 546 insertions, 0 deletions
diff --git a/lbup/__init__.py b/lbup/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lbup/__init__.py
diff --git a/lbup/_ssh_client.py b/lbup/_ssh_client.py
new file mode 100644
index 0000000..dec3390
--- /dev/null
+++ b/lbup/_ssh_client.py
@@ -0,0 +1,51 @@
+import paramiko.client as pmc
+
+from ._sshfp_policy import SSHFPPolicy
+
+class SSHConnection:
+ """
+ An SSH client connection to a remote server, with support for a proxy "jump"
+ host, like OpenSSH's 'ssh -J'. Uses only SSHFP for host key verification.
+
+ May be used as a context manager.
+
+ :param SSHRemote remote: Remote host to connect to.
+ """
+ _proxy_conn = None
+ _client = None
+ _remote = None
+
+ def __init__(self, remote):
+ sock = None
+ if remote.proxy_remote is not None:
+ self._proxy_conn = SSHConnection(remote.proxy_remote)
+ t = self._proxy_conn.get_transport()
+ sock = t.open_channel('direct-tcpip', (remote.host, remote.port), ('localhost', 0))
+
+ self._client = pmc.SSHClient()
+ self._client.set_missing_host_key_policy(SSHFPPolicy())
+ self._client.connect(remote.host, remote.port, remote.username,
+ sock = sock)
+
+ self._remote = remote
+
+ def close(self):
+ if self._client:
+ self._client.close()
+ self._client = None
+ if self._proxy_conn:
+ self._proxy_conn.close()
+ self._proxy_conn = None
+
+ def exec_command(self, *args, **kwargs):
+ return self._client.exec_command(*args, **kwargs)
+ def get_transport(self):
+ return self._client.get_transport()
+
+ def __enter__(self):
+ return self
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+ def __str__(self):
+ return 'ssh:%s' % str(self._remote)
diff --git a/lbup/_sshfp_policy.py b/lbup/_sshfp_policy.py
new file mode 100644
index 0000000..8b7c2c7
--- /dev/null
+++ b/lbup/_sshfp_policy.py
@@ -0,0 +1,61 @@
+import hashlib
+
+from dns import flags, rdatatype, resolver as dnsres
+
+from paramiko.client import MissingHostKeyPolicy
+from paramiko.common import DEBUG
+
+_key_algorithms = {
+ 'ssh-rsa' : '1',
+ 'ssh-dss' : '2',
+ 'ecdsa-sha2-nistp256' : '3',
+ 'ecdsa-sha2-nistp384' : '3',
+ 'ecdsa-sha2-nistp521' : '3',
+ 'ssh-ed25519' : '4',
+}
+
+_hash_funcs = {
+ '1' : hashlib.sha1,
+ '2' : hashlib.sha256,
+}
+
+class SSHFPPolicy(MissingHostKeyPolicy):
+ '''Checks for a matching SSHFP RR'''
+ def __init__(self, resolver = None):
+ if resolver is None:
+ resolver = dnsres.Resolver()
+ resolver.use_edns(0, flags.DO, 1280)
+
+ self._resolver = resolver
+
+ def missing_host_key(self, client, hostname, key):
+ try:
+ key_alg = _key_algorithms[key.get_name()]
+ except KeyError:
+ raise Exception('Unsupported key type for SSHFP: %s' % key.get_name())
+
+ try:
+ resp = self._resolver.query(hostname, 'SSHFP')
+ except dnsres.NoAnswer:
+ raise Exception('Could not obtain SSHFP records for host: %s' % hostname)
+
+ if not resp.response.flags & flags.AD:
+ raise Exception('Answer does not have a valid DNSSEC signature')
+
+ for item in resp:
+ try:
+ alg, fg_type, fg = item.to_text().split()
+ except ValueError:
+ raise Exception('Invalid SSHFP record format: %s' % item.to_text())
+
+ if alg != key_alg:
+ continue
+
+ if not fg_type in _hash_funcs:
+ continue
+
+ fg_expect = _hash_funcs[fg_type](key.asbytes()).hexdigest()
+ if fg_expect == fg:
+ client._log(DEBUG, 'Found valid SSHFP record for host %s' % hostname)
+ return
+ raise Exception('No matching SSHFP records found')
diff --git a/lbup/exceptions.py b/lbup/exceptions.py
new file mode 100644
index 0000000..69201ef
--- /dev/null
+++ b/lbup/exceptions.py
@@ -0,0 +1,14 @@
+class BackupException(Exception):
+ pass
+
+class RemoteExecException(BackupException):
+ retcode = None
+ output = None
+ def __init__(self, explanation, retcode, output):
+ super().__init__(explanation)
+ self.retcode = retcode
+ self.output = output
+
+ def __str__(self):
+ return (super().__str__() +
+ ';%d: %s' % (self.retcode, self.output.decode('utf-8', errors = 'backslashreplace')))
diff --git a/lbup/repository.py b/lbup/repository.py
new file mode 100644
index 0000000..0cb1161
--- /dev/null
+++ b/lbup/repository.py
@@ -0,0 +1,81 @@
+import fcntl
+import os
+import os.path
+import subprocess
+
+class StepResult:
+ retcode = None
+ output = None
+ def __init__(self, retcode = 0, output = None):
+ self.retcode = retcode
+ self.output = output
+
+class BackupResult:
+ target_results = None
+ par2_result = None
+
+ def __init__(self):
+ self.target_results = []
+ self.par2_result = StepResult()
+
+ @property
+ def all_ok(self):
+ return (all(map(lambda tgtres: tgtres.retcode == 0, self.target_results)) and
+ self.par2_result.retcode == 0)
+
+
+class Repo:
+ """
+ A single Bup repository into which the data will be backed up, plus a
+ separate directory for extra runtime data.
+
+ :param str bup_dir: path to the bup repository, defaults to BUP_DIR or ~/.bup
+ :param str data_dir: path to the directory for storing the runtime data,
+ defaults to ~/.local/var/lbup
+ """
+ bup_dir = None
+ data_dir = None
+ lock_name = 'lock'
+
+ def __init__(self, bup_dir = None, data_dir = None):
+ if bup_dir is None:
+ if 'BUP_DIR' in os.environ:
+ bup_dir = os.environ['BUP_DIR']
+ else:
+ bup_dir = os.path.expanduser('~/.bup')
+
+ if data_dir is None:
+ data_dir = os.path.expanduser('~/.local/var/lbup/')
+
+ # create the data dir, if it does not already exist
+ os.makedirs(data_dir, 0o700, exist_ok = True)
+
+ self.bup_dir = bup_dir
+ self.data_dir = data_dir
+
+ def backup(self, tgts, gen_par2 = True):
+ """
+ Backup the supplied targets.
+
+ :param list of Target tgts: List of targets to back up.
+ :param bool gen_par2: Whether to generate par2 recovery information
+ after the backup concludes'
+ """
+ with open(os.path.join(self.data_dir, self.lock_name), 'w') as lockfile:
+ result = BackupResult()
+
+ fcntl.lockf(lockfile, fcntl.LOCK_EX)
+ try:
+ for tgt in tgts:
+ res = tgt.save(self.data_dir)
+ result.target_results.append(res)
+
+ if gen_par2:
+ res = subprocess.run(['bup', 'fsck', '-g'],
+ capture_output = True)
+ result.par2_result = StepResult(res.returncode,
+ res.stderr + res.stdout)
+ finally:
+ fcntl.lockf(lockfile, fcntl.LOCK_UN)
+
+ return result
diff --git a/lbup/ssh_remote.py b/lbup/ssh_remote.py
new file mode 100644
index 0000000..634c86a
--- /dev/null
+++ b/lbup/ssh_remote.py
@@ -0,0 +1,25 @@
+class SSHRemote:
+ """
+ Specification of an SSH remote host, represented by a combination of host,
+ port and username, plus an optional proxy remote.
+ :param str host:
+ :param int port:
+ :param str username:
+ :param SSHRemote proxy_remote: proxy through which the connection should be
+ tunnelled
+ """
+ host = None
+ port = None
+ username = None
+ proxy_remote = None
+ def __init__(self, host, port, username, proxy_remote = None):
+ self.host = host
+ self.port = port
+ self.username = username
+ self.proxy_remote = proxy_remote
+ def __str__(self):
+ return '{user}@{host}:{port}{proxy}'.format(
+ user = self.username,
+ host = self.host,
+ port = self.port,
+ proxy = '@@{%s}' % str(self.proxy_remote) if self.proxy_remote else '')
diff --git a/lbup/targets.py b/lbup/targets.py
new file mode 100644
index 0000000..0d3ffe8
--- /dev/null
+++ b/lbup/targets.py
@@ -0,0 +1,314 @@
+
+from abc import ABC, abstractmethod
+import contextlib
+import errno
+import logging
+import re
+import socket
+import subprocess
+
+from .exceptions import BackupException, RemoteExecException
+from . import repository
+from . import ssh_remote
+from . import _ssh_client
+
+def _parse_name(name):
+ """
+ Parse a backup name into a remote specification.
+ """
+ # split off the username
+ if not '@' in name:
+ raise ValueError('Invalid backup name: "%s", must be of format user@host')
+ username, _, host = name.partition('@')
+
+ port = 22 # overridden later if specified in name
+ colons = host.count(':')
+ if colons >= 2: # IPv6 literal, possibly with port
+ m = re.match(r'\[(.+)\](:\d+)?', host, re.ASCII | re.IGNORECASE)
+ if m is not None: # [literal]:port
+ host, port = m.groups()
+ elif colons == 1: # host:port
+ host, _, port = host.partition(':')
+
+ return ssh_remote.SSHRemote(host, port, username)
+
+class Target(ABC):
+ name = None
+ dirs = None
+ excludes = None
+
+ _logger = None
+
+ def __init__(self, name, dirs, excludes = None, logger = None):
+ if excludes is None:
+ excludes = []
+
+ self.name = name
+ self.dirs = dirs
+ self.excludes = excludes
+
+ if logger is None:
+ self._logger = logging.getLogger(self.name)
+ else:
+ self._logger = logger
+
+ def _log_command(self, name, retcode, stdout, stderr):
+ self._logger.debug('%s finished with return code %d' % (name, retcode))
+
+ def sanitize(b):
+ LOG_LEN = 128
+ # truncate and decode
+ s = b[:LOG_LEN].decode('utf-8', errors = 'backslashreplace')
+ # replace newlines with literal \n's
+ s = r'\n'.join(s.splitlines())
+ # add ellipsis if truncated
+ if len(b) > LOG_LEN:
+ s += '[...]'
+
+ return s
+
+ if len(stdout) > 0:
+ self._logger.debug('%s stdout: %s' % (name, sanitize(stdout)))
+ if len(stderr) > 0:
+ self._logger.debug('%s stderr: %s' % (name, sanitize(stderr)))
+
+ def _do_save(self, bup_exec, path_prefix = '', index_opts = None, save_opts = None):
+ excludes = [path_prefix + '/' + e for e in self.excludes]
+ dirs = [path_prefix + '/' + d for d in self.dirs]
+
+ if index_opts is None:
+ index_opts = []
+ if save_opts is None:
+ save_opts = []
+
+ # index
+ cmd = bup_exec + ['index', '--update', '--one-file-system'] + index_opts
+ cmd.extend(['--exclude=%s' % e for e in excludes])
+ cmd.extend(dirs)
+
+ self._logger.debug('Executing index command: ' + str(cmd))
+ res_idx = subprocess.run(cmd, capture_output = True)
+ self._log_command('Index', res_idx.returncode,
+ res_idx.stdout, res_idx.stderr)
+
+ # save
+ cmd = bup_exec + ['save', '-n', self.name] + save_opts + dirs
+ self._logger.debug('Executing save command: ' + str(cmd))
+ res_save = subprocess.run(cmd, capture_output = True)
+ self._log_command('Save', res_save.returncode,
+ res_save.stdout, res_save.stderr)
+
+ retcode = 0
+ output = b''
+ if res_idx.returncode != 0:
+ retcode = res_idx.returncode
+ output += res_idx.stderr + res_idx.stdout
+ if res_save.returncode != 0:
+ retcode = res_save.returncode
+ output += res_save.stderr + res_save.stdout
+
+ result = repository.StepResult(retcode, output)
+
+ return result
+
+ @abstractmethod
+ def save(self, data_dir):
+ pass
+
+class TargetLocal(Target):
+ def save(self, data_dir):
+ return self._do_save(['bup'])
+
+class TargetSSH(Target):
+ _remote = None
+
+ def __init__(self, name, dirs, excludes = None, logger = None,
+ remote = None, remote_bupdir = None):
+ if remote is None:
+ remote = _parse_name(name)
+ if remote.proxy_remote is not None:
+ raise NotImplementedError('Proxy remote not implemented')
+ if remote.port != 22:
+ raise NotImplementedError('Specifying port not implemented')
+ self._remote = remote
+
+ if remote_bupdir is None:
+ remote_bupdir = '$HOME/.bup'
+ self._remote_bupdir = remote_bupdir
+
+ super().__init__(name, dirs, excludes, logger)
+
+ def _paramiko_exec_cmd(self, client, cmd):
+ self._logger.debug('Client %s: executing command: %s' % (client, cmd))
+
+ res = client.exec_command(cmd)
+
+ chan = res[0].channel
+ chan.settimeout(64)
+ try:
+ out, err = res[1].read(), res[2].read()
+ except socket.timeout as t:
+ raise RemoteExecException('Timeout waiting for command output',
+ errno.ETIMEDOUT, b'') from t
+
+ chan.recv_exit_status()
+ if chan.exit_status != 0:
+ raise RemoteExecException('Error executing "%s"' % cmd,
+ chan.exit_status, err + out)
+
+ self._log_command('Remote command', chan.exit_status, out, err)
+
+ return out.decode('utf-8', errors = 'backslashreplace')
+
+ def _resolve_remote_bupdir(self, ssh):
+ bupdir = self._paramiko_exec_cmd(ssh, 'realpath -e ' + self._remote_bupdir).splitlines()
+ if (len(bupdir) != 1 or len(bupdir[0]) <= 1 or bupdir[0][0] != '/' or
+ re.search(r'\s', bupdir[0])):
+ raise BackupException('Invalid BUP_DIR on the remote target: %s' % str(bupdir))
+ return bupdir[0]
+
+ def save(self, data_dir):
+ with _ssh_client.SSHConnection(self._remote) as ssh:
+ remote_bupdir = self._resolve_bupdir(ssh)
+
+ bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host),
+ '-d', remote_bupdir]
+ return self._do_save(['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host)])
+
+class TargetSSHLXCLVM(TargetSSH):
+ """
+ This target backs up an LXC container that lives on its own LVM logical
+ volume. Requires root-capable login on the container's host.
+
+ :param SSHRemote parent_remote:
+ """
+ _parent_remote = None
+ _lxc_username = None
+ _lxc_containername = None
+
+ def __init__(self, name, dirs, excludes = None, logger = None,
+ target_remote = None, target_remote_bupdir = None,
+ parent_remote = None,
+ lxc_username = None, lxc_containername = None,
+ snapshot_size = '20G'):
+ if parent_remote is None:
+ raise ValueError('parent_remote not specified')
+ if lxc_username is None:
+ lxc_username = parent_remote.usename
+
+ self._parent_remote = parent_remote
+ self._lxc_username = lxc_username
+ self._lxc_containername = lxc_containername
+ self._snapshot_size = snapshot_size
+
+ super().__init__(name, dirs, excludes, logger, target_remote, target_remote_bupdir)
+
+ def save(self, data_dir):
+ with contextlib.ExitStack() as stack:
+ parent = stack.enter_context(_ssh_client.SSHConnection(self._parent_remote))
+ container = stack.enter_context(_ssh_client.SSHConnection(self._remote))
+
+ # resolve the path to BUP_DIR on the container
+ container_bupdir = self._resolve_remote_bupdir(container)
+
+ # make sure the mount directory exists
+ # due to how bup index works, the mount directory has to stay the
+ # same for each backup
+ # we use BUP_DIR/lbup_mount
+ container_mountpoint = '%s/%s' % (container_bupdir, 'lbup_mount')
+ self._paramiko_exec_cmd(container, 'mkdir -p -m 700 ' + container_mountpoint)
+
+ save_opts = ['--strip-path', container_mountpoint]
+
+ # get the PID of the container's init
+ cmd_template = 'su -s /bin/sh -c "{command}" %s' % self._lxc_username
+ container_pid = self._paramiko_exec_cmd(parent, cmd_template.format(
+ command = 'lxc-info -H -p -n %s' % self._lxc_containername)).rstrip('\n')
+ # make sure it's a number
+ if not re.fullmatch('[0-9]+', container_pid):
+ raise BackupException('Invalid container PID: %s' % container_pid)
+
+ # get the LV/VG for the container's rootfs
+ container_rootfs = self._paramiko_exec_cmd(parent, cmd_template.format(
+ command = 'lxc-info -H -c lxc.rootfs.path -n %s' %
+ self._lxc_containername)).rstrip('\n')
+ # oct-escape certain characters as they are in /proc/mounts
+ # see seq_path[_root]() in linux
+ container_rootfs = container_rootfs.translate(
+ { ord(' ') : r'\040', ord('\t') : r'\011',
+ ord('\n') : r'\012', ord('\\') : r'\0134'})
+ # make sure the rootfs path is
+ # - non-empty
+ # - an absolute path
+ # - contains no whitespace
+ if (len(container_rootfs) <= 1 or container_rootfs[0] != '/' or
+ re.search(r'\s', container_rootfs)):
+ raise BackupException('Unxpected container rootfs directory: %s' % container_rootfs)
+
+ # find the device node and the filesystem type for the container rootfs
+ mountlines = self._paramiko_exec_cmd(parent,
+ 'grep "%s" /proc/mounts' % container_rootfs).splitlines()
+ if len(mountlines) != 1:
+ raise BackupException('Expected exactly one matching mount line for the '
+ 'container root, got %d' % len(mountlines))
+
+ mountline = mountlines[0].split()
+ if len(mountline) < 2 or mountline[1] != container_rootfs:
+ raise BackupException('Invalid mount line: %s' % mountline)
+ lv_path = mountline[0]
+ lv_fstype = mountline[2]
+ # make sure the LV path is
+ # - non-empty
+ # - an absolute path
+ # - contains no whitespace
+ # and that the FS type is non-empty
+ if (len(lv_path) <= 1 or lv_path[0] != '/' or
+ re.search(r'\s', lv_path) or len(lv_fstype) < 1):
+ raise BackupException('Unexpected LV path/FS type: %s\t%s' % (lv_path, lv_fstype))
+
+ # find the LV and VG names
+ lvdisplay = self._paramiko_exec_cmd(parent,
+ 'lvdisplay -C --noheadings -o lv_name,vg_name ' + lv_path).split()
+ if len(lvdisplay) != 2:
+ raise BackupException('Unexpected lvdisplay output: %s' % str(lvdisplay))
+ lv_name, vg_name = lvdisplay
+ if len(lv_name) < 1 or len(vg_name) < 1:
+ raise BackupException('Unexpected LV/VG name: %s\t%s' % (lv_name, vg_name))
+
+ # create a read-only snapshot
+ snapshot_name = 'lbup_' + lv_name
+ self._paramiko_exec_cmd(parent,
+ 'lvcreate --permission r --snapshot -L {size} -n {name} {origin}'
+ .format(size = self._snapshot_size, name = snapshot_name,
+ origin = lv_path))
+ stack.callback(lambda: self._paramiko_exec_cmd(parent,
+ 'lvremove -f %s/%s' % (vg_name, snapshot_name)))
+
+ # execute the backup
+ # wait for the new node to be created
+ self._paramiko_exec_cmd(parent, 'udevadm settle')
+
+ # we cannot trust any binaries located inside the container, since a
+ # compromised container could use them to execute arbitrary code
+ # with real root privileges, thus nullifying the point of
+ # unprivileged containers)
+ # so we ship a special tool, 'nsmount', which has to be
+ # installed on the parent, to mount the snapshot into the
+ # container mount namespace
+ self._paramiko_exec_cmd(parent,
+ 'nsmount m {pid} {mountpoint} {devpath} {fstype}'.format(
+ pid = container_pid, mountpoint = container_mountpoint,
+ devpath = '/dev/%s/%s' % (vg_name, snapshot_name),
+ fstype = lv_fstype))
+
+ bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host),
+ '-d', container_bupdir]
+ try:
+ ret = self._do_save(bup_exec, path_prefix = container_mountpoint,
+ save_opts = save_opts, index_opts = ['--no-check-device'])
+ finally:
+ self._paramiko_exec_cmd(parent,
+ 'nsmount u {pid} {mountpoint}'.format(
+ pid = container_pid, mountpoint = container_mountpoint))
+
+ return ret