From 946fdc6a4078e6dcaf8c2b87b5466583e2c18882 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 13 Feb 2020 21:43:57 +0100 Subject: Implement basic working LXC+LVM snapshot functionality. Still missing: - proper exception handling - stats --- nsmount.c | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 nsmount.c (limited to 'nsmount.c') diff --git a/nsmount.c b/nsmount.c new file mode 100644 index 0000000..9d1533a --- /dev/null +++ b/nsmount.c @@ -0,0 +1,233 @@ +/** + * nsmount - mount a block device into a mount/pid namespace + * Copyright (C) 2019 Anton Khirnov + * + * nsmount is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * nsmount is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with nsmount. If not, see . + */ + +#define _XOPEN_SOURCE 700 +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +enum { + OP_MOUNT, + OP_UMOUNT, +}; + +static void print_usage(int argc, const char * const *argv) +{ + fprintf(stderr, + "%s: mount/unmount a block device in a mount/PID namespace\n\n" + "Usage:\n" + " %s m \n" + " %s u \n\n" + " : PID (in the namespace in which this program is executed)" + " of the process whose namespaces are to be entered into\n" + " : path (in the destination mount namespace) to be mounted" + " or unmounted\n" + " : path (in the namespace in which this program is executed)" + " to the block device that shall be mounted\n" + " : type of the filesystem to be mounted\n", + argv[0], argv[0], argv[0]); +} + +int main(int argc, const char * const *argv) +{ + char pathbuf[128]; + int blockdev_fd = -1, pidns_fd = -1, mountns_fd = -1; + const char *blockdev, *mountpoint, *fstype; + pid_t tgt_pid, child_pid; + int op; + int ret; + + /* parse the commandline */ + if (argc < 2) { + print_usage(argc, argv); + return 1; + } + + if (argv[1][0] == 'm') { + op = OP_MOUNT; + if (argc < 6) { + print_usage(argc, argv); + return 1; + } + } else if (argv[1][0] == 'u') { + op = OP_UMOUNT; + if (argc < 4) { + print_usage(argc, argv); + return 1; + } + } else { + fprintf(stderr, "Invalid operation: %s\n", + argv[1]); + print_usage(argc, argv); + return 1; + } + + tgt_pid = strtol(argv[2], NULL, 0); + mountpoint = argv[3]; + if (op == OP_MOUNT) { + blockdev = argv[4]; + fstype = argv[5]; + } + + /* open the files */ + if (op == OP_MOUNT) { + blockdev_fd = open(blockdev, O_RDONLY); + if (blockdev_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + blockdev, strerror(errno)); + return 2; + } + } + + ret = snprintf(pathbuf, sizeof(pathbuf), "/proc/%d/ns/pid", + tgt_pid); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the PID namespace path\n"); + ret = 2; + goto finish; + } + + pidns_fd = open(pathbuf, O_RDONLY | O_CLOEXEC); + if (pidns_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + pathbuf, strerror(errno)); + ret = 2; + goto finish; + } + + ret = snprintf(pathbuf, sizeof(pathbuf), "/proc/%d/ns/mnt", + tgt_pid); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the mount namespace path\n"); + ret = 2; + goto finish; + } + + mountns_fd = open(pathbuf, O_RDONLY | O_CLOEXEC); + if (mountns_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + pathbuf, strerror(errno)); + ret = 2; + goto finish; + } + + /* enter the namespaces */ + ret = setns(pidns_fd, CLONE_NEWPID); + if (ret == -1) { + fprintf(stderr, "Error entering the PID namespace: %s\n", + strerror(errno)); + ret = 3; + goto finish; + } + + ret = setns(mountns_fd, CLONE_NEWNS); + if (ret == -1) { + fprintf(stderr, "Error entering the mount namespace: %s\n", + strerror(errno)); + ret = 3; + goto finish; + } + + /* fork to actually enter the PID namespace */ + child_pid = fork(); + if (child_pid == -1) { + fprintf(stderr, "fork() failed: %s\n", + strerror(errno)); + ret = 4; + goto finish; + } + + if (child_pid) { + /* we are the parent */ + ret = wait(NULL); + if (ret == -1) { + fprintf(stderr, "Error waiting for the child: %s\n", + strerror(errno)); + ret = 4; + goto finish; + } + } else { + /* we are the child */ + if (op == OP_MOUNT) { + /* we use /proc/self/fd to mount the device + * Since the container controls its own filesystem hierarchy, it + * could trick us into mounting an arbitrary node located in the + * filesystem. This is not considered a major security problem, + * since + * - the container should not have access to mknod() or nodes that + * it is not meant to read + * - we mount the filesystem read-only, with nosuid flag + * - since the container will typically live in its own user + * namespace, it will not have the right permissions to access a + * filesystem that is not intended for it + * + * Ideally, there would be something like a mountfd() syscall that + * would allow mounting an fd. + */ + ret = snprintf(pathbuf, sizeof(pathbuf), + "/proc/self/fd/%d", blockdev_fd); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the mount path\n"); + ret = 4; + goto finish; + } + + ret = mount(pathbuf, mountpoint, fstype, MS_RDONLY | MS_NOSUID, NULL); + if (ret == -1) { + fprintf(stderr, "mount(%s, %s) failed: %s\n", + pathbuf, mountpoint, strerror(errno)); + ret = 5; + goto finish; + } + } else if (op == OP_UMOUNT) { + /** + * As above, a malicious container can trick us into unmounting a + * filesystem in its tree. This should not cause any issues other + * than disrupting the container (which a compromised container can + * already do without our help). + */ + ret = umount(mountpoint); + if (ret == -1) { + fprintf(stderr, "umount() failed: %s\n", strerror(errno)); + ret = 5; + goto finish; + } + } + } + + ret = 0; +finish: + if (blockdev_fd >= 0) + close(blockdev_fd); + if (pidns_fd >= 0) + close(pidns_fd); + if (mountns_fd >= 0) + close(mountns_fd); + + return ret; +} -- cgit v1.2.3