mirror of
https://github.com/containers/podman.git
synced 2026-01-21 04:20:18 -05:00
Add the possibility to join directly the user and mount namespace without looking up the parent of the user namespace. We need this in order to be able the conmon process, as the mount namespace is kept alive only there. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
256 lines
5.2 KiB
C
256 lines
5.2 KiB
C
#define _GNU_SOURCE
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <sys/syscall.h>
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
#include <sys/stat.h>
|
|
#include <limits.h>
|
|
#include <sys/types.h>
|
|
#include <signal.h>
|
|
#include <fcntl.h>
|
|
#include <sys/wait.h>
|
|
#include <string.h>
|
|
|
|
static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces";
|
|
static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone";
|
|
|
|
static int
|
|
syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid)
|
|
{
|
|
return (int) syscall (__NR_setresuid, ruid, euid, suid);
|
|
}
|
|
|
|
static int
|
|
syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid)
|
|
{
|
|
return (int) syscall (__NR_setresgid, rgid, egid, sgid);
|
|
}
|
|
|
|
static int
|
|
syscall_clone (unsigned long flags, void *child_stack)
|
|
{
|
|
return (int) syscall (__NR_clone, flags, child_stack);
|
|
}
|
|
|
|
static char **
|
|
get_cmd_line_args (pid_t pid)
|
|
{
|
|
int fd;
|
|
char path[PATH_MAX];
|
|
char *buffer;
|
|
size_t allocated;
|
|
size_t used = 0;
|
|
int ret;
|
|
int i, argc = 0;
|
|
char **argv;
|
|
|
|
sprintf (path, "/proc/%d/cmdline", pid);
|
|
fd = open (path, O_RDONLY);
|
|
if (fd < 0)
|
|
return NULL;
|
|
|
|
allocated = 512;
|
|
buffer = malloc (allocated);
|
|
if (buffer == NULL)
|
|
return NULL;
|
|
for (;;)
|
|
{
|
|
do
|
|
ret = read (fd, buffer + used, allocated - used);
|
|
while (ret < 0 && errno == EINTR);
|
|
if (ret < 0)
|
|
return NULL;
|
|
|
|
if (ret == 0)
|
|
break;
|
|
|
|
used += ret;
|
|
if (allocated == used)
|
|
{
|
|
allocated += 512;
|
|
buffer = realloc (buffer, allocated);
|
|
if (buffer == NULL)
|
|
return NULL;
|
|
}
|
|
}
|
|
close (fd);
|
|
|
|
for (i = 0; i < used; i++)
|
|
if (buffer[i] == '\0')
|
|
argc++;
|
|
if (argc == 0)
|
|
return NULL;
|
|
|
|
argv = malloc (sizeof (char *) * (argc + 1));
|
|
if (argv == NULL)
|
|
return NULL;
|
|
argc = 0;
|
|
|
|
argv[argc++] = buffer;
|
|
for (i = 0; i < used - 1; i++)
|
|
if (buffer[i] == '\0')
|
|
argv[argc++] = buffer + i + 1;
|
|
|
|
argv[argc] = NULL;
|
|
|
|
return argv;
|
|
}
|
|
|
|
int
|
|
reexec_userns_join (int userns, int mountns)
|
|
{
|
|
pid_t ppid = getpid ();
|
|
char uid[16];
|
|
char **argv;
|
|
int pid;
|
|
|
|
sprintf (uid, "%d", geteuid ());
|
|
|
|
argv = get_cmd_line_args (ppid);
|
|
if (argv == NULL)
|
|
{
|
|
fprintf (stderr, "cannot read argv: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
pid = fork ();
|
|
if (pid < 0)
|
|
fprintf (stderr, "cannot fork: %s\n", strerror (errno));
|
|
if (pid)
|
|
return pid;
|
|
|
|
setenv ("_LIBPOD_USERNS_CONFIGURED", "init", 1);
|
|
setenv ("_LIBPOD_ROOTLESS_UID", uid, 1);
|
|
|
|
if (setns (userns, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
close (userns);
|
|
|
|
if (mountns >= 0 && setns (mountns, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
close (userns);
|
|
|
|
if (syscall_setresgid (0, 0, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
if (syscall_setresuid (0, 0, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
execvp (argv[0], argv);
|
|
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
static void
|
|
check_proc_sys_userns_file (const char *path)
|
|
{
|
|
FILE *fp;
|
|
fp = fopen (path, "r");
|
|
if (fp)
|
|
{
|
|
char buf[32];
|
|
size_t n_read = fread (buf, 1, sizeof(buf) - 1, fp);
|
|
if (n_read > 0)
|
|
{
|
|
buf[n_read] = '\0';
|
|
if (strtol (buf, NULL, 10) == 0)
|
|
fprintf (stderr, "user namespaces are not enabled in %s\n", path);
|
|
}
|
|
fclose (fp);
|
|
}
|
|
}
|
|
|
|
int
|
|
reexec_in_user_namespace (int ready)
|
|
{
|
|
int ret;
|
|
pid_t pid;
|
|
char b;
|
|
pid_t ppid = getpid ();
|
|
char **argv;
|
|
char uid[16];
|
|
|
|
sprintf (uid, "%d", geteuid ());
|
|
|
|
pid = syscall_clone (CLONE_NEWUSER|CLONE_NEWNS|SIGCHLD, NULL);
|
|
if (pid < 0)
|
|
{
|
|
FILE *fp;
|
|
fprintf (stderr, "cannot clone: %s\n", strerror (errno));
|
|
check_proc_sys_userns_file (_max_user_namespaces);
|
|
check_proc_sys_userns_file (_unprivileged_user_namespaces);
|
|
}
|
|
if (pid)
|
|
return pid;
|
|
|
|
argv = get_cmd_line_args (ppid);
|
|
if (argv == NULL)
|
|
{
|
|
fprintf (stderr, "cannot read argv: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
setenv ("_LIBPOD_USERNS_CONFIGURED", "init", 1);
|
|
setenv ("_LIBPOD_ROOTLESS_UID", uid, 1);
|
|
|
|
do
|
|
ret = read (ready, &b, 1) < 0;
|
|
while (ret < 0 && errno == EINTR);
|
|
if (ret < 0)
|
|
{
|
|
fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
close (ready);
|
|
|
|
if (syscall_setresgid (0, 0, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
if (syscall_setresuid (0, 0, 0) < 0)
|
|
{
|
|
fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
execvp (argv[0], argv);
|
|
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
int
|
|
reexec_in_user_namespace_wait (int pid)
|
|
{
|
|
pid_t p;
|
|
int status;
|
|
|
|
do
|
|
p = waitpid (pid, &status, 0);
|
|
while (p < 0 && errno == EINTR);
|
|
|
|
if (p < 0)
|
|
return -1;
|
|
|
|
if (WIFEXITED (status))
|
|
return WEXITSTATUS (status);
|
|
if (WIFSIGNALED (status))
|
|
return 128 + WTERMSIG (status);
|
|
return -1;
|
|
}
|