diff --git a/libpod/container_api.go b/libpod/container_api.go index 300fe5ca20..1efa82800c 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -541,119 +541,97 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) } return -1, define.ErrCtrRemoved } - var conmonTimer time.Timer - conmonTimerSet := false - conmonPidFd := c.getConmonPidFd() - if conmonPidFd != -1 { - defer unix.Close(conmonPidFd) - } - conmonPidFdTriggered := false + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() - getExitCode := func() (bool, int32, error) { - containerRemoved := false - if !c.batched { - c.lock.Lock() - defer c.lock.Unlock() - } - - if err := c.syncContainer(); err != nil { - if !errors.Is(err, define.ErrNoSuchCtr) { - return false, -1, err - } - containerRemoved = true - } - - // If conmon is not alive anymore set a timer to make sure - // we're returning even if conmon has forcefully been killed. - if !conmonTimerSet && !containerRemoved { - conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) - switch { - case errors.Is(err, define.ErrNoSuchCtr): - // Container has been removed, so we assume the - // exit code is present in the DB. - containerRemoved = true - case err != nil: - return false, -1, err - case !conmonAlive: - // Give the exit code at most 20 seconds to - // show up in the DB. That should largely be - // enough for the cleanup process. - timerDuration := time.Second * 20 - conmonTimer = *time.NewTimer(timerDuration) - conmonTimerSet = true - case conmonAlive: - // Continue waiting if conmon's still running. - return false, -1, nil - } - } - - timedout := "" - if !containerRemoved { - // If conmon is dead for more than $timerDuration or if the - // container has exited properly, try to look up the exit code. - select { - case <-conmonTimer.C: - logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id) - timedout = " [exceeded conmon timeout waiting for container to exit]" - default: - switch c.state.State { - case define.ContainerStateExited, define.ContainerStateConfigured: - // Container exited, so we can look up the exit code. - case define.ContainerStateStopped: - // Continue looping unless the restart policy is always. - // In this case, the container would never transition to - // the exited state, so we need to look up the exit code. - if c.config.RestartPolicy != define.RestartPolicyAlways { - return false, -1, nil - } - default: - // Continue looping - return false, -1, nil + // Note this is one of the rare cases where we do not like to use syncContainer() as it does read the exit file + // We like to avoid that here because that means we might read it before container cleanup run and possible + // removed the container + if err := c.runtime.state.UpdateContainer(c); err != nil { + if errors.Is(err, define.ErrNoSuchCtr) { + // if the container is not valid at this point as it was deleted, + // check if the exit code was recorded in the db. + exitCode, err := c.runtime.state.GetContainerExitCode(id) + if err == nil { + return exitCode, nil } } + return -1, err } + } + conmonPID := c.state.ConmonPID + // conmonPID == 0 means container is not running + if conmonPID == 0 { exitCode, err := c.runtime.state.GetContainerExitCode(id) if err != nil { if errors.Is(err, define.ErrNoSuchExitCode) { // If the container is configured or created, we must assume it never ran. if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) { - return true, 0, nil + return 0, nil } } - return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout) - } - - return true, exitCode, nil - } - - for { - hasExited, exitCode, err := getExitCode() - if hasExited { - return exitCode, err - } - if err != nil { return -1, err } - select { - case <-ctx.Done(): - return -1, fmt.Errorf("waiting for exit code of container %s canceled", id) - default: - if conmonPidFd != -1 && !conmonPidFdTriggered { - // If possible (pidfd works), the first cycle we block until conmon dies - // If this happens, and we fall back to the old poll delay - // There is a deadlock in the cleanup code for "play kube" which causes - // conmon to not exit, so unfortunately we have to use the poll interval - // timeout here to avoid hanging. - fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}} - _, _ = unix.Poll(fds, int(pollInterval.Milliseconds())) - conmonPidFdTriggered = true - } else { - time.Sleep(pollInterval) + return exitCode, nil + } + + conmonPidFd := c.getConmonPidFd() + if conmonPidFd > -1 { + defer unix.Close(conmonPidFd) + } + + // we cannot wait locked as we would hold the lock forever, so we unlock and then lock again + c.lock.Unlock() + err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval) + c.lock.Lock() + if err != nil { + return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err) + } + + // we locked again so we must sync the state + if err := c.syncContainer(); err != nil { + if errors.Is(err, define.ErrNoSuchCtr) { + // if the container is not valid at this point as it was deleted, + // check if the exit code was recorded in the db. + exitCode, err := c.runtime.state.GetContainerExitCode(id) + if err == nil { + return exitCode, nil } } + return -1, err } + + // syncContainer already did the exit file read so nothing extra for us to do here + return c.runtime.state.GetContainerExitCode(id) +} + +func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error { + if conmonPidFd > -1 { + for { + fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}} + if _, err := unix.Poll(fds, -1); err != nil { + if err == unix.EINTR { + continue + } + return err + } + return nil + } + } + // no pidfd support, we must poll the pid + for { + if err := unix.Kill(conmonPID, 0); err != nil { + if err == unix.ESRCH { + break + } + return err + } + time.Sleep(pollInterval) + } + return nil } type waitResult struct { diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 5ea2448e20..e4b3e71406 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -694,16 +694,14 @@ func (c *Container) makePlatformBindMounts() error { } func (c *Container) getConmonPidFd() int { - if c.state.ConmonPID != 0 { - // Track lifetime of conmon precisely using pidfd_open + poll. - // There are many cases for this to fail, for instance conmon is dead - // or pidfd_open is not supported (pre linux 5.3), so fall back to the - // traditional loop with poll + sleep - if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil { - return fd - } else if err != unix.ENOSYS && err != unix.ESRCH { - logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err) - } + // Track lifetime of conmon precisely using pidfd_open + poll. + // There are many cases for this to fail, for instance conmon is dead + // or pidfd_open is not supported (pre linux 5.3), so fall back to the + // traditional loop with poll + sleep + if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil { + return fd + } else if err != unix.ENOSYS && err != unix.ESRCH { + logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err) } return -1 } diff --git a/test/system/030-run.bats b/test/system/030-run.bats index ec788642b9..1aa079dab4 100644 --- a/test/system/030-run.bats +++ b/test/system/030-run.bats @@ -1289,13 +1289,15 @@ EOF rm -rf $romount } -@test "podman run --restart=always -- wait" { +@test "podman run --restart=always/on-failure -- wait" { # regression test for #18572 to make sure Podman waits less than 20 seconds ctr=c_$(safename) - run_podman run -d --restart=always --name=$ctr $IMAGE false - PODMAN_TIMEOUT=20 run_podman wait $ctr - is "$output" "1" "container should exit 1" - run_podman rm -f -t0 $ctr + for policy in always on-failure; do + run_podman run -d --restart=$policy --name=$ctr $IMAGE false + PODMAN_TIMEOUT=20 run_podman wait $ctr + is "$output" "1" "container should exit 1 (policy: $policy)" + run_podman rm -f -t0 $ctr + done } @test "podman run - custom static_dir" {