From 495d3acc7b7337131b94aaa28bdbd68d34e7f599 Mon Sep 17 00:00:00 2001 From: Fernando Serboncini Date: Thu, 7 May 2026 16:14:27 -0400 Subject: [PATCH] tstest/natlab/vmtest: kill QEMU when test process dies (#19676) Re-exec the test binary as a thin wrapper that holds a pipe inherited from the test. When the test goes away (any reason, including SIGKILL, panic, or OOM), the kernel closes the pipe write end; the wrapper sees EOF and SIGKILLs itself, taking QEMU and its children with it. Updates #13038 Change-Id: Ib2151098193551396c1d7bb51b07da3bd6b2cfb4 Signed-off-by: Fernando Serboncini --- tstest/natlab/vmtest/qemu.go | 10 ++- tstest/natlab/vmtest/qemu_wrapper.go | 90 ++++++++++++++++++++ tstest/natlab/vmtest/qemu_wrapper_windows.go | 20 +++++ 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 tstest/natlab/vmtest/qemu_wrapper.go create mode 100644 tstest/natlab/vmtest/qemu_wrapper_windows.go diff --git a/tstest/natlab/vmtest/qemu.go b/tstest/natlab/vmtest/qemu.go index 757657e51..1486a9ef8 100644 --- a/tstest/natlab/vmtest/qemu.go +++ b/tstest/natlab/vmtest/qemu.go @@ -211,7 +211,14 @@ func (e *Env) launchQEMU(name, logPath string, args []string) error { } cmd.Stdout = qemuLog cmd.Stderr = qemuLog + parentPipe, err := killWithParent(cmd) + if err != nil { + devNull.Close() + qemuLog.Close() + return fmt.Errorf("killWithParent: %w", err) + } if err := cmd.Start(); err != nil { + parentPipe.Close() devNull.Close() qemuLog.Close() return fmt.Errorf("qemu for %s: %w", name, err) @@ -224,8 +231,9 @@ func (e *Env) launchQEMU(name, logPath string, args []string) error { go e.tailLogFile(e.ctx, name, logPath) } e.t.Cleanup(func() { - cmd.Process.Kill() + killProcessTree(cmd) cmd.Wait() + parentPipe.Close() devNull.Close() qemuLog.Close() // Dump tail of VM log on failure for debugging. diff --git a/tstest/natlab/vmtest/qemu_wrapper.go b/tstest/natlab/vmtest/qemu_wrapper.go new file mode 100644 index 000000000..5b5843bed --- /dev/null +++ b/tstest/natlab/vmtest/qemu_wrapper.go @@ -0,0 +1,90 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +//go:build unix + +package vmtest + +import ( + "fmt" + "io" + "log" + "os" + "os/exec" + "strconv" + "syscall" +) + +// Re-exec'd as a wrapper around QEMU: when the test process dies (any +// reason, including SIGKILL), the kernel closes the pipe write end, the +// wrapper sees EOF, and kills QEMU's process group. + +const wrapperEnv = "TS_VMTEST_QEMU_WRAPPER" + +func init() { + if os.Getenv(wrapperEnv) == "" { + return + } + runQEMUWrapper() +} + +func runQEMUWrapper() { + fd, err := strconv.Atoi(os.Getenv(wrapperEnv)) + if err != nil { + log.Fatalf("vmtest qemu wrapper: bad %s: %v", wrapperEnv, err) + } + os.Unsetenv(wrapperEnv) + if len(os.Args) < 2 { + log.Fatalf("vmtest qemu wrapper: missing command") + } + pipeFd := os.NewFile(uintptr(fd), "parent-pipe") + + // QEMU inherits our pgid (the test set Setpgid on us), so a group kill + // from the test reaches QEMU too. Don't set Setpgid here. + cmd := exec.Command(os.Args[1], os.Args[2:]...) + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + if err := cmd.Start(); err != nil { + log.Fatalf("vmtest qemu wrapper: %v", err) + } + + go func() { + // Block until the parent's pipe write end closes (EOF), then kill + // our process group (which includes QEMU and any of its children). + io.Copy(io.Discard, pipeFd) + syscall.Kill(0, syscall.SIGKILL) + }() + + cmd.Wait() +} + +// killWithParent rewrites cmd to run via a wrapper that kills it if the +// test process dies. The returned *os.File must be kept alive until the +// command is no longer needed; closing it makes the wrapper exit. +func killWithParent(cmd *exec.Cmd) (*os.File, error) { + self, err := os.Executable() + if err != nil { + return nil, fmt.Errorf("os.Executable: %w", err) + } + r, w, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("pipe: %w", err) + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, r) + pipeFd := 3 + len(cmd.ExtraFiles) - 1 // stdin/stdout/stderr + ExtraFiles index + cmd.Args = append([]string{self, cmd.Path}, cmd.Args[1:]...) + cmd.Path = self + if cmd.Env == nil { + cmd.Env = os.Environ() + } + cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", wrapperEnv, pipeFd)) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + return w, nil +} + +// killProcessTree SIGKILLs cmd's process group (cmd plus any descendants +// that didn't escape it). +func killProcessTree(cmd *exec.Cmd) error { + return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) +} diff --git a/tstest/natlab/vmtest/qemu_wrapper_windows.go b/tstest/natlab/vmtest/qemu_wrapper_windows.go new file mode 100644 index 000000000..59d96fad9 --- /dev/null +++ b/tstest/natlab/vmtest/qemu_wrapper_windows.go @@ -0,0 +1,20 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package vmtest + +import ( + "os" + "os/exec" +) + +// Stubs for Windows: no parent-death watcher, no process-group kill. +// The test still launches QEMU; cleanup just kills the single process. + +func killWithParent(cmd *exec.Cmd) (*os.File, error) { + return os.Open(os.DevNull) +} + +func killProcessTree(cmd *exec.Cmd) error { + return cmd.Process.Kill() +}