diff options
Diffstat (limited to 'nsmount.c')
-rw-r--r-- | nsmount.c | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/nsmount.c b/nsmount.c new file mode 100644 index 0000000..9d1533a --- /dev/null +++ b/nsmount.c @@ -0,0 +1,233 @@ +/** + * nsmount - mount a block device into a mount/pid namespace + * Copyright (C) 2019 Anton Khirnov <anton@khirnov.net> + * + * nsmount is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * nsmount is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with nsmount. If not, see <http://www.gnu.org/licenses/>. + */ + +#define _XOPEN_SOURCE 700 +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/wait.h> + +enum { + OP_MOUNT, + OP_UMOUNT, +}; + +static void print_usage(int argc, const char * const *argv) +{ + fprintf(stderr, + "%s: mount/unmount a block device in a mount/PID namespace\n\n" + "Usage:\n" + " %s m <PID> <mountpoint> <blkdev_path> <fstype>\n" + " %s u <PID> <mountpoint>\n\n" + " <PID>: PID (in the namespace in which this program is executed)" + " of the process whose namespaces are to be entered into\n" + " <mountpoint>: path (in the destination mount namespace) to be mounted" + " or unmounted\n" + " <blkdev_path>: path (in the namespace in which this program is executed)" + " to the block device that shall be mounted\n" + " <fstype>: type of the filesystem to be mounted\n", + argv[0], argv[0], argv[0]); +} + +int main(int argc, const char * const *argv) +{ + char pathbuf[128]; + int blockdev_fd = -1, pidns_fd = -1, mountns_fd = -1; + const char *blockdev, *mountpoint, *fstype; + pid_t tgt_pid, child_pid; + int op; + int ret; + + /* parse the commandline */ + if (argc < 2) { + print_usage(argc, argv); + return 1; + } + + if (argv[1][0] == 'm') { + op = OP_MOUNT; + if (argc < 6) { + print_usage(argc, argv); + return 1; + } + } else if (argv[1][0] == 'u') { + op = OP_UMOUNT; + if (argc < 4) { + print_usage(argc, argv); + return 1; + } + } else { + fprintf(stderr, "Invalid operation: %s\n", + argv[1]); + print_usage(argc, argv); + return 1; + } + + tgt_pid = strtol(argv[2], NULL, 0); + mountpoint = argv[3]; + if (op == OP_MOUNT) { + blockdev = argv[4]; + fstype = argv[5]; + } + + /* open the files */ + if (op == OP_MOUNT) { + blockdev_fd = open(blockdev, O_RDONLY); + if (blockdev_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + blockdev, strerror(errno)); + return 2; + } + } + + ret = snprintf(pathbuf, sizeof(pathbuf), "/proc/%d/ns/pid", + tgt_pid); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the PID namespace path\n"); + ret = 2; + goto finish; + } + + pidns_fd = open(pathbuf, O_RDONLY | O_CLOEXEC); + if (pidns_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + pathbuf, strerror(errno)); + ret = 2; + goto finish; + } + + ret = snprintf(pathbuf, sizeof(pathbuf), "/proc/%d/ns/mnt", + tgt_pid); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the mount namespace path\n"); + ret = 2; + goto finish; + } + + mountns_fd = open(pathbuf, O_RDONLY | O_CLOEXEC); + if (mountns_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", + pathbuf, strerror(errno)); + ret = 2; + goto finish; + } + + /* enter the namespaces */ + ret = setns(pidns_fd, CLONE_NEWPID); + if (ret == -1) { + fprintf(stderr, "Error entering the PID namespace: %s\n", + strerror(errno)); + ret = 3; + goto finish; + } + + ret = setns(mountns_fd, CLONE_NEWNS); + if (ret == -1) { + fprintf(stderr, "Error entering the mount namespace: %s\n", + strerror(errno)); + ret = 3; + goto finish; + } + + /* fork to actually enter the PID namespace */ + child_pid = fork(); + if (child_pid == -1) { + fprintf(stderr, "fork() failed: %s\n", + strerror(errno)); + ret = 4; + goto finish; + } + + if (child_pid) { + /* we are the parent */ + ret = wait(NULL); + if (ret == -1) { + fprintf(stderr, "Error waiting for the child: %s\n", + strerror(errno)); + ret = 4; + goto finish; + } + } else { + /* we are the child */ + if (op == OP_MOUNT) { + /* we use /proc/self/fd to mount the device + * Since the container controls its own filesystem hierarchy, it + * could trick us into mounting an arbitrary node located in the + * filesystem. This is not considered a major security problem, + * since + * - the container should not have access to mknod() or nodes that + * it is not meant to read + * - we mount the filesystem read-only, with nosuid flag + * - since the container will typically live in its own user + * namespace, it will not have the right permissions to access a + * filesystem that is not intended for it + * + * Ideally, there would be something like a mountfd() syscall that + * would allow mounting an fd. + */ + ret = snprintf(pathbuf, sizeof(pathbuf), + "/proc/self/fd/%d", blockdev_fd); + if (ret < 0 || ret >= sizeof(pathbuf)) { + fprintf(stderr, "Error constructing the mount path\n"); + ret = 4; + goto finish; + } + + ret = mount(pathbuf, mountpoint, fstype, MS_RDONLY | MS_NOSUID, NULL); + if (ret == -1) { + fprintf(stderr, "mount(%s, %s) failed: %s\n", + pathbuf, mountpoint, strerror(errno)); + ret = 5; + goto finish; + } + } else if (op == OP_UMOUNT) { + /** + * As above, a malicious container can trick us into unmounting a + * filesystem in its tree. This should not cause any issues other + * than disrupting the container (which a compromised container can + * already do without our help). + */ + ret = umount(mountpoint); + if (ret == -1) { + fprintf(stderr, "umount() failed: %s\n", strerror(errno)); + ret = 5; + goto finish; + } + } + } + + ret = 0; +finish: + if (blockdev_fd >= 0) + close(blockdev_fd); + if (pidns_fd >= 0) + close(pidns_fd); + if (mountns_fd >= 0) + close(mountns_fd); + + return ret; +} |