mirror of
https://github.com/containers/podman.git
synced 2026-03-30 04:22:04 -04:00
Merge pull request #28323 from giuseppe/detect-stale-pause-process
rootless: detect and remove stale pause.pid with recycled PIDs
This commit is contained in:
@@ -643,6 +643,55 @@ can_use_shortcut (char **argv)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Best-effort check: read /proc/<pid>/environ and look for _PODMAN_PAUSE=1.
|
||||
Returns 1 if found, 0 if not found or on any error. */
|
||||
static int
|
||||
is_pause_process (long pid)
|
||||
{
|
||||
cleanup_free char *environ_path = NULL;
|
||||
cleanup_free char *buf = NULL;
|
||||
cleanup_close int fd = -1;
|
||||
const size_t buf_size = 4096;
|
||||
ssize_t n;
|
||||
|
||||
if (asprintf (&environ_path, "/proc/%ld/environ", pid) < 0)
|
||||
return 0;
|
||||
|
||||
buf = malloc (buf_size);
|
||||
if (buf == NULL)
|
||||
return 0;
|
||||
|
||||
fd = open (environ_path, O_RDONLY | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
|
||||
/* Read in chunks and search for the null-delimited key=value entry. */
|
||||
n = TEMP_FAILURE_RETRY (read (fd, buf, buf_size));
|
||||
if (n <= 0)
|
||||
return 0;
|
||||
|
||||
/* environ entries are separated by '\0'. Search for "_PODMAN_PAUSE=1". */
|
||||
for (char *p = buf; p < buf + n; )
|
||||
{
|
||||
if (strcmp (p, "_PODMAN_PAUSE=1") == 0)
|
||||
return 1;
|
||||
p += strlen (p) + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If the process referred to by pause.pid is not actually a pause process,
|
||||
it means the PID was recycled. Warn the user and remove the stale file. */
|
||||
static void
|
||||
check_stale_pause_pid (long pid, const char *path)
|
||||
{
|
||||
if (!is_pause_process (pid))
|
||||
{
|
||||
fprintf (stderr, "pause.pid file refers to PID %ld which is not a pause process, the process may have exited and the PID been recycled. Removing %s\n", pid, path);
|
||||
unlink (path);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
open_namespace (int pid_to_join, const char *ns_file)
|
||||
{
|
||||
@@ -842,14 +891,23 @@ static void __attribute__((constructor)) init()
|
||||
|
||||
userns_fd = open_namespace (pid, "user");
|
||||
if (userns_fd < 0)
|
||||
return;
|
||||
{
|
||||
check_stale_pause_pid (pid, path);
|
||||
return;
|
||||
}
|
||||
|
||||
mntns_fd = open_namespace (pid, "mnt");
|
||||
if (mntns_fd < 0)
|
||||
return;
|
||||
{
|
||||
check_stale_pause_pid (pid, path);
|
||||
return;
|
||||
}
|
||||
|
||||
if (setns (userns_fd, 0) < 0)
|
||||
return;
|
||||
{
|
||||
check_stale_pause_pid (pid, path);
|
||||
return;
|
||||
}
|
||||
|
||||
/* This is a fatal error we can't recover from since we have already joined the userns. */
|
||||
join_namespace_or_die ("mnt", mntns_fd);
|
||||
|
||||
@@ -422,6 +422,21 @@ func BecomeRootInUserNS(stateDir string) (bool, int, error) {
|
||||
return becomeRootInUserNS(stateDir)
|
||||
}
|
||||
|
||||
// isPauseProcess checks if the given PID has _PODMAN_PAUSE=1 in its environment.
|
||||
// It is a best-effort check; any errors are silently ignored and it returns false.
|
||||
func isPauseProcess(pid int) bool {
|
||||
data, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, entry := range bytes.Split(data, []byte{0}) {
|
||||
if string(entry) == "_PODMAN_PAUSE=1" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
|
||||
// This is useful when there are already running containers and we
|
||||
// don't have a pause process yet. We can use the paths to the conmon
|
||||
@@ -447,6 +462,12 @@ func TryJoinFromFilePaths(stateDir string, paths []string) (bool, int, error) {
|
||||
if err == nil {
|
||||
return joined, ret, nil
|
||||
}
|
||||
if !isPauseProcess(pid) {
|
||||
logrus.Warningf("pause.pid file refers to PID %d which is not a pause process, the process may have exited and the PID been recycled. Removing %s", pid, path)
|
||||
os.Remove(path)
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,6 +149,37 @@ function _check_pause_process() {
|
||||
run_podman rm -f -t0 $cname1
|
||||
}
|
||||
|
||||
# Test that podman detects and recovers from a stale pause.pid with a recycled PID
|
||||
@test "rootless podman recovers from stale pause.pid with recycled PID" {
|
||||
skip_if_not_rootless "pause process is only used as rootless"
|
||||
skip_if_remote "system migrate not supported via remote"
|
||||
|
||||
run_podman info
|
||||
|
||||
_check_pause_process
|
||||
|
||||
if [ -e $ns_handles_file ]; then
|
||||
skip "ns_handles in use, not pause.pid"
|
||||
fi
|
||||
|
||||
kill -9 $pause_pid
|
||||
|
||||
sleep 99999 &
|
||||
local fake_pid=$!
|
||||
|
||||
echo -n $fake_pid > $pause_pid_file
|
||||
|
||||
run_podman info
|
||||
assert "$output" =~ "pause.pid file refers to PID $fake_pid which is not a pause process" \
|
||||
"podman should report stale pause.pid"
|
||||
assert "$output" =~ "Removing.*pause.pid" \
|
||||
"podman should report removing the stale pause.pid file"
|
||||
|
||||
kill $fake_pid 2>/dev/null || true
|
||||
|
||||
_check_pause_process
|
||||
}
|
||||
|
||||
# regression test for https://issues.redhat.com/browse/RHEL-59620
|
||||
@test "rootless userns can unmount netns properly" {
|
||||
skip_if_not_rootless "pause process is only used as rootless"
|
||||
|
||||
Reference in New Issue
Block a user