Merge pull request #28323 from giuseppe/detect-stale-pause-process

rootless: detect and remove stale pause.pid with recycled PIDs
This commit is contained in:
Paul Holzinger
2026-03-19 18:33:53 +01:00
committed by GitHub
3 changed files with 113 additions and 3 deletions

View File

@@ -643,6 +643,55 @@ can_use_shortcut (char **argv)
return ret;
}
/* Best-effort check: read /proc/<pid>/environ and look for _PODMAN_PAUSE=1.
Returns 1 if found, 0 if not found or on any error. */
static int
is_pause_process (long pid)
{
cleanup_free char *environ_path = NULL;
cleanup_free char *buf = NULL;
cleanup_close int fd = -1;
const size_t buf_size = 4096;
ssize_t n;
if (asprintf (&environ_path, "/proc/%ld/environ", pid) < 0)
return 0;
buf = malloc (buf_size);
if (buf == NULL)
return 0;
fd = open (environ_path, O_RDONLY | O_CLOEXEC);
if (fd < 0)
return 0;
/* Read in chunks and search for the null-delimited key=value entry. */
n = TEMP_FAILURE_RETRY (read (fd, buf, buf_size));
if (n <= 0)
return 0;
/* environ entries are separated by '\0'. Search for "_PODMAN_PAUSE=1". */
for (char *p = buf; p < buf + n; )
{
if (strcmp (p, "_PODMAN_PAUSE=1") == 0)
return 1;
p += strlen (p) + 1;
}
return 0;
}
/* If the process referred to by pause.pid is not actually a pause process,
it means the PID was recycled. Warn the user and remove the stale file. */
static void
check_stale_pause_pid (long pid, const char *path)
{
if (!is_pause_process (pid))
{
fprintf (stderr, "pause.pid file refers to PID %ld which is not a pause process, the process may have exited and the PID been recycled. Removing %s\n", pid, path);
unlink (path);
}
}
static int
open_namespace (int pid_to_join, const char *ns_file)
{
@@ -842,14 +891,23 @@ static void __attribute__((constructor)) init()
userns_fd = open_namespace (pid, "user");
if (userns_fd < 0)
return;
{
check_stale_pause_pid (pid, path);
return;
}
mntns_fd = open_namespace (pid, "mnt");
if (mntns_fd < 0)
return;
{
check_stale_pause_pid (pid, path);
return;
}
if (setns (userns_fd, 0) < 0)
return;
{
check_stale_pause_pid (pid, path);
return;
}
/* This is a fatal error we can't recover from since we have already joined the userns. */
join_namespace_or_die ("mnt", mntns_fd);

View File

@@ -422,6 +422,21 @@ func BecomeRootInUserNS(stateDir string) (bool, int, error) {
return becomeRootInUserNS(stateDir)
}
// isPauseProcess checks if the given PID has _PODMAN_PAUSE=1 in its environment.
// It is a best-effort check; any errors are silently ignored and it returns false.
func isPauseProcess(pid int) bool {
data, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
if err != nil {
return false
}
for _, entry := range bytes.Split(data, []byte{0}) {
if string(entry) == "_PODMAN_PAUSE=1" {
return true
}
}
return false
}
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
@@ -447,6 +462,12 @@ func TryJoinFromFilePaths(stateDir string, paths []string) (bool, int, error) {
if err == nil {
return joined, ret, nil
}
if !isPauseProcess(pid) {
logrus.Warningf("pause.pid file refers to PID %d which is not a pause process, the process may have exited and the PID been recycled. Removing %s", pid, path)
os.Remove(path)
lastErr = err
continue
}
lastErr = err
}
}

View File

@@ -149,6 +149,37 @@ function _check_pause_process() {
run_podman rm -f -t0 $cname1
}
# Test that podman detects and recovers from a stale pause.pid with a recycled PID
@test "rootless podman recovers from stale pause.pid with recycled PID" {
skip_if_not_rootless "pause process is only used as rootless"
skip_if_remote "system migrate not supported via remote"
run_podman info
_check_pause_process
if [ -e $ns_handles_file ]; then
skip "ns_handles in use, not pause.pid"
fi
kill -9 $pause_pid
sleep 99999 &
local fake_pid=$!
echo -n $fake_pid > $pause_pid_file
run_podman info
assert "$output" =~ "pause.pid file refers to PID $fake_pid which is not a pause process" \
"podman should report stale pause.pid"
assert "$output" =~ "Removing.*pause.pid" \
"podman should report removing the stale pause.pid file"
kill $fake_pid 2>/dev/null || true
_check_pause_process
}
# regression test for https://issues.redhat.com/browse/RHEL-59620
@test "rootless userns can unmount netns properly" {
skip_if_not_rootless "pause process is only used as rootless"