from abc import ABC, abstractmethod import contextlib import errno import logging import re import secrets import shlex import socket import subprocess from .exceptions import BackupException, RemoteExecException from ._mountinfo import MountInfo from ._path import AbsPath, ROOT from .ssh_remote import SSHRemote from . import repository from . import _ssh_client def _parse_name(name): """ Parse a backup name into a remote specification. """ # split off the username if not '@' in name: raise ValueError('Invalid backup name: "%s", must be of format user@host') username, _, host = name.partition('@') port = 22 # overridden later if specified in name colons = host.count(':') if colons >= 2: # IPv6 literal, possibly with port m = re.match(r'\[(.+)\](:\d+)?', host, re.ASCII | re.IGNORECASE) if m is not None: # [literal]:port host, port = m.groups() elif colons == 1: # host:port host, _, port = host.partition(':') return SSHRemote(host, port, username) def _mounts_for_dirs(mntinfo, dirs): mounts = list(sorted(set(mntinfo.mountpoint_for_path(d) for d in dirs), key = lambda m: m.index)) if len(mounts) == 0: raise RuntimeError('Expected at least one mount, got zero') for i, m in enumerate(mounts): if m.root != ROOT: raise ValueError('Offset root in mount, this is not supported', m) if i and m.parent not in mounts[:i]: raise ValueError('Mount has an intermediate parent, this is not supported', m) return mounts class Target(ABC): name = None dirs = None excludes = None _logger = None def __init__(self, name, dirs, excludes = None, logger = None): if excludes is None: excludes = [] if len(dirs) < 1: raise ValueError('One or more dirs to backup required') self.name = name self.dirs = dirs self.excludes = excludes if logger is None: self._logger = logging.getLogger('%s.%s' % (self.__class__.__name__, self.name)) else: self._logger = logger def __repr__(self): return "%s{%s/%s}" % (self.__class__.__name__, self.dirs, self.excludes) def _log_command(self, name, retcode, stdout, stderr): self._logger.debug('%s finished with return code %d' % (name, retcode)) def sanitize(b): LOG_LEN = 128 # truncate and decode s = b[:LOG_LEN].decode('utf-8', errors = 'backslashreplace') # replace newlines with literal \n's s = r'\n'.join(s.splitlines()) # add ellipsis if truncated if len(b) > LOG_LEN: s += '[...]' return s if len(stdout) > 0: self._logger.debug('%s stdout: %s' % (name, sanitize(stdout))) if len(stderr) > 0: self._logger.debug('%s stderr: %s' % (name, sanitize(stderr))) def _do_save(self, bup_exec, dry_run, *, dirs = None, excludes = None, index_opts = None, save_opts = None): if dirs is None: dirs = self.dirs if excludes is None: excludes = self.excludes if index_opts is None: index_opts = [] if save_opts is None: save_opts = [] # index cmd = bup_exec + ['index', '--update', '--one-file-system'] + index_opts cmd.extend(['--exclude=%s' % e for e in excludes]) cmd.extend(dirs) if dry_run: self._logger.debug('Not executing index command: ' + str(cmd)) else: self._logger.debug('Executing index command: ' + str(cmd)) res_idx = subprocess.run(cmd, capture_output = True) self._log_command('Index', res_idx.returncode, res_idx.stdout, res_idx.stderr) # save cmd = bup_exec + ['save', '-n', self.name] + save_opts + list(map(str, dirs)) retcode = 0 output = b'' if dry_run: self._logger.debug('Not executing save command: ' + str(cmd)) else: self._logger.debug('Executing save command: ' + str(cmd)) res_save = subprocess.run(cmd, capture_output = True) self._log_command('Save', res_save.returncode, res_save.stdout, res_save.stderr) if res_idx.returncode != 0: retcode = res_idx.returncode output += res_idx.stderr + res_idx.stdout if res_save.returncode != 0: retcode = res_save.returncode output += res_save.stderr + res_save.stdout result = repository.StepResult(retcode == 0, output) return result @abstractmethod def save(self, dry_run = False): pass class TargetLocal(Target): def save(self, dry_run = False): return self._do_save(['bup'], dry_run) class TargetSSH(Target): _remote = None _remote_bupdir = None def __init__(self, name, dirs, excludes = None, logger = None, remote = None, remote_bupdir = None): if remote is None: remote = _parse_name(name) self._remote = remote if remote_bupdir is None: remote_bupdir = '$HOME/.bup' self._remote_bupdir = remote_bupdir super().__init__(name, dirs, excludes, logger) def __str__(self): return "%s{SSH:%s}" % (super().__str__(), str(self._remote)) def _paramiko_exec_cmd(self, client, cmd, decode = True): cmd = shlex.join(map(str, cmd)) self._logger.debug('Client %s: executing command: %s' % (client, cmd)) res = client.exec_command(cmd) chan = res[0].channel chan.settimeout(64) try: out, err = res[1].read(), res[2].read() except socket.timeout as t: raise RemoteExecException('Timeout waiting for command output', errno.ETIMEDOUT, b'') from t chan.recv_exit_status() if chan.exit_status != 0: raise RemoteExecException('Error executing "%s"' % cmd, chan.exit_status, err + out) self._log_command('Remote command', chan.exit_status, out, err) if decode: out = out.decode('utf-8', errors = 'backslashreplace') return out def _resolve_remote_path(self, ssh, path): path = self._paramiko_exec_cmd(ssh, ['realpath', '-e', path]).splitlines() if len(path) != 1: raise BackupException('Expected exactly one path from realpath', path) return AbsPath(path[0]) def save(self, dry_run = False): with _ssh_client.SSHConnection(self._remote) as ssh: remote_bupdir = self._resolve_remote_path(ssh, self._remote_bupdir) bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host), '-d', remote_bupdir] return self._do_save(bup_exec, dry_run) class TargetSSHLVM(TargetSSH): """ This target backs up a remote host using LVM snapshots. Requires root login on the system. :param str path_prefix: Prefix to be added to all paths in dirs and excludes. This prefix will not appear in the backups. """ _snapshot_size = None _path_prefix = None def __init__(self, name, dirs, excludes = None, logger = None, remote = None, remote_bupdir = None, snapshot_size = '20G', path_prefix = '/'): self._snapshot_size = snapshot_size self._path_prefix = AbsPath(path_prefix) super().__init__(name, dirs, excludes, logger, remote, remote_bupdir) def __str__(self): return "%s{LVM:%s}" % (super().__str__(), self._snapshot_size) def _resolve_lv(self, ssh, devnum): """ Find the logical volume for the given device number. Return its full name, i.e. vgname/lvname """ major = devnum >> 8 minor = devnum & 255 res = self._paramiko_exec_cmd(ssh, ['lvs', '--select', 'kernel_major=%d&&kernel_minor=%d' % (major, minor), '--noheadings', '-o', 'lv_full_name']) lv_name = res.strip() # valid LV paths are volname/lvname, each non-empty alphanumeric+_ if not re.fullmatch(r'\w+/\w+', lv_name, re.ASCII): raise BackupException('Invalid LV path', lv_name) return lv_name @contextlib.contextmanager def _snapshot_lv(self, ssh, devnum): """ Return a context manager that creates a read-only LVM snapshot for the specified LV device number and destroys it at exit. """ lv_fullname = self._resolve_lv(ssh, devnum) self._logger.debug('LV volume to snapshot is %s', lv_fullname) vg_name = lv_fullname.split('/')[0] # create a read-only snapshot with a random name # make sure snapshot name starts with a letter snapshot_name = 'a' + secrets.token_urlsafe() snapshot_fullname = '%s/%s' % (vg_name, snapshot_name) self._paramiko_exec_cmd(ssh, ['lvcreate', '--permission', 'r', '--snapshot', '-L', self._snapshot_size, '-n', snapshot_name, lv_fullname]) try: # get the path to the snapshot device node res = self._paramiko_exec_cmd(ssh, ['lvs', '--select', 'lv_full_name=%s' % snapshot_fullname, '--noheadings', '-o', 'lv_path']) lv_path = res.strip() if not lv_path.startswith('/'): raise BackupException('Got invalid snapshot LV path', lv_path) self._logger.debug('Created snapshot %s at %s', snapshot_fullname, lv_path) yield lv_path finally: self._paramiko_exec_cmd(ssh, ['lvremove', '-f', snapshot_fullname]) self._logger.debug('Removed snapshot %s', snapshot_fullname) @contextlib.contextmanager def _mount_snapshot(self, ssh, devnum, mount_path, fstype): """ Return a context manager that creates a read-only LVM snapshot for the specified LV device number and mounts it at mount_path, then unmounts and destroys it at exit. """ with self._snapshot_lv(ssh, devnum) as lv_path: self._paramiko_exec_cmd(ssh, ['mount', '-t', fstype, '-oro', lv_path, mount_path]) try: yield None finally: self._paramiko_exec_cmd(ssh, ['umount', mount_path]) def save(self, dry_run = False): with contextlib.ExitStack() as stack: root_remote = SSHRemote(self._remote.host, self._remote.port, 'root', self._remote.proxy_remote) conn_tgt = stack.enter_context(_ssh_client.SSHConnection(self._remote)) conn_root = stack.enter_context(_ssh_client.SSHConnection(root_remote)) # resolve the remote paths bupdir = self._resolve_remote_path(conn_tgt, self._remote_bupdir) dirs = [self._resolve_remote_path(conn_tgt, self._path_prefix + d) for d in self.dirs] excludes = [self._resolve_remote_path(conn_tgt, self._path_prefix + d) for d in self.excludes] # make sure the mount directory exists # due to how bup index works, the mount directory has to stay the # same for each backup # we use BUP_DIR/lbup_mount snapshot_root = bupdir + 'lbup_mount' self._paramiko_exec_cmd(conn_tgt, ['mkdir', '-p', '-m', '700', str(snapshot_root)]) # read and parse the mountinfo table mntinfo = MountInfo( self._paramiko_exec_cmd(conn_root, ['cat', '/proc/1/mountinfo'], decode = False)) mounts = _mounts_for_dirs(mntinfo, dirs) for mnt in mounts: self._logger.debug('Processing mount: %s', str(mnt)) # make sure we have a valid fstype fstype = mnt.fstype.decode('ascii') if not re.fullmatch(r'\w+', fstype, re.ASCII): raise BackupException('Invalid LV FS type', fstype) # The volumes to snapshot are sorted by mount order. # We reparent the snapshot mountpoints such that the topmost one # ends up at snapshot_root, and the rest (if any) have the same # relative position to the top one as they do in the real # hierarchy snapshot_path = mnt.mount_point.reparent(mounts[0].mount_point, snapshot_root) self._logger.debug('Snapshotting device %s(%s) mounted at %s', "%d:%d" % (mnt.devnum >> 8, mnt.devnum & 255), fstype, str(mnt.mount_point)) stack.enter_context(self._mount_snapshot(conn_root, mnt.devnum, str(snapshot_path), fstype)) bup_exec = ['bup', 'on', '%s@%s' % (self._remote.username, self._remote.host), '-d', str(bupdir)] mnt_offset = mounts[0].mount_point.reparent(self._path_prefix, ROOT) index_path = bupdir + self.name save_opts = ['--graft=%s=%s' % (snapshot_root, mnt_offset), '--indexfile=%s' % str(index_path)] index_opts = ['--no-check-device', '--indexfile=%s' % str(index_path)] reparent = (mounts[0].mount_point, snapshot_root) dirs = [str(d.reparent(*reparent)) for d in dirs] excludes = [str(d.reparent(*reparent)) for d in excludes] return self._do_save(bup_exec, dry_run, dirs = dirs, excludes = excludes, save_opts = save_opts, index_opts = index_opts)