From bdf28c51cd31e24b8ec26fb283c6cb570ee0a90a Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 20 May 2026 20:43:55 +0200 Subject: [PATCH] network: terminate live CDP connections on shutdown The CDP server ignored a single SIGTERM while a connection was live: the process only exited if the socket was closed before the signal, or after a third signal. A conventional one-shot graceful stop (SIGTERM then waitpid) hung. On shutdown the sighandler runs Network.stop (which sets `shutdown` and lets the run loop exit) before Server.shutdown. A live CDP worker parks in curl_multi_poll and is woken ONLY by the Network thread via dropCdp -> handles.wakeup(). Once the run loop exits with links still live, nothing can wake those workers, so Server.deinit()'s `while (active_threads > 0)` loop spins forever. Drop every still-live CDP link from the run loop when `shutdown` is set, reusing the existing peer-EOF path: dropCdp(notify=true) pushes a .disconnect into the worker's inbox and wakes it, so cdp.tick() returns false and the worker exits before the loop breaks. --- src/network/Network.zig | 54 +++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/network/Network.zig b/src/network/Network.zig index 13243d08..54b19d5d 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -683,6 +683,35 @@ fn processCdpEvents(self: *Network) void { } } +// On shutdown, force-disconnect every still-live CDP link. Each link's +// worker thread blocks in curl_multi_poll and is woken ONLY by this +// (Network) thread via dropCdp -> handles.wakeup(). If the run loop +// exits with links still live, those workers never wake and +// Server.deinit() spins on active_threads forever (issue #2510). +// Mirrors the peer-EOF path in processCdpEvents: dropCdp(notify=true) +// pushes a .disconnect into the worker's inbox and wakes it, so +// cdp.tick() returns false and the worker exits. +fn shutdownCdpLinks(self: *Network) void { + self.cdp_mutex.lock(); + defer self.cdp_mutex.unlock(); + + var any_removed = false; + var it = self.cdp_links.first; + while (it) |node| { + const next = node.next; + const link: *CdpLink = @fieldParentPtr("node", node); + if (link.state == .live) { + self.dropCdp(link, null, true); + any_removed = true; + } + it = next; + } + + if (any_removed) { + self.cdp_unregister.broadcast(); + } +} + pub fn run(self: *Network) void { var drain_buf: [64]u8 = undefined; var running_handles: c_int = 0; @@ -760,13 +789,24 @@ pub fn run(self: *Network) void { self.fireTicks(); - if (self.shutdown.load(.acquire) and running_handles == 0) { - // Check if fireTicks submitted new requests (e.g. telemetry flush). - // If so, continue the loop to drain and send them before exiting. - self.submission_mutex.lock(); - const has_pending = self.submission_queue.first != null; - self.submission_mutex.unlock(); - if (!has_pending) break; + if (self.shutdown.load(.acquire)) { + // Force-disconnect any still-live CDP clients so their worker + // threads wake from curl_multi_poll and exit. A worker is woken + // only by this (Network) thread; if the loop exits with links + // still live, those workers block forever and Server.deinit() + // spins on active_threads (issue #2510). Idempotent — a no-op + // once the links are drained. + self.shutdownCdpLinks(); + + if (running_handles == 0) { + // Check if fireTicks submitted new requests (e.g. telemetry + // flush). If so, continue the loop to drain and send them + // before exiting. + self.submission_mutex.lock(); + const has_pending = self.submission_queue.first != null; + self.submission_mutex.unlock(); + if (!has_pending) break; + } } }