network: terminate live CDP connections on shutdown

The CDP server ignored a single SIGTERM while a connection was live: the
process only exited if the socket was closed before the signal, or after a
third signal. A conventional one-shot graceful stop (SIGTERM then waitpid)
hung.

On shutdown the sighandler runs Network.stop (which sets `shutdown` and lets
the run loop exit) before Server.shutdown. A live CDP worker parks in
curl_multi_poll and is woken ONLY by the Network thread via
dropCdp -> handles.wakeup(). Once the run loop exits with links still live,
nothing can wake those workers, so Server.deinit()'s
`while (active_threads > 0)` loop spins forever.

Drop every still-live CDP link from the run loop when `shutdown` is set,
reusing the existing peer-EOF path: dropCdp(notify=true) pushes a .disconnect
into the worker's inbox and wakes it, so cdp.tick() returns false and the
worker exits before the loop breaks.
This commit is contained in:
Navid EMAD
2026-05-20 20:43:55 +02:00
parent 5905319e78
commit bdf28c51cd

View File

@@ -683,6 +683,35 @@ fn processCdpEvents(self: *Network) void {
}
}
// On shutdown, force-disconnect every still-live CDP link. Each link's
// worker thread blocks in curl_multi_poll and is woken ONLY by this
// (Network) thread via dropCdp -> handles.wakeup(). If the run loop
// exits with links still live, those workers never wake and
// Server.deinit() spins on active_threads forever (issue #2510).
// Mirrors the peer-EOF path in processCdpEvents: dropCdp(notify=true)
// pushes a .disconnect into the worker's inbox and wakes it, so
// cdp.tick() returns false and the worker exits.
fn shutdownCdpLinks(self: *Network) void {
self.cdp_mutex.lock();
defer self.cdp_mutex.unlock();
var any_removed = false;
var it = self.cdp_links.first;
while (it) |node| {
const next = node.next;
const link: *CdpLink = @fieldParentPtr("node", node);
if (link.state == .live) {
self.dropCdp(link, null, true);
any_removed = true;
}
it = next;
}
if (any_removed) {
self.cdp_unregister.broadcast();
}
}
pub fn run(self: *Network) void {
var drain_buf: [64]u8 = undefined;
var running_handles: c_int = 0;
@@ -760,13 +789,24 @@ pub fn run(self: *Network) void {
self.fireTicks();
if (self.shutdown.load(.acquire) and running_handles == 0) {
// Check if fireTicks submitted new requests (e.g. telemetry flush).
// If so, continue the loop to drain and send them before exiting.
self.submission_mutex.lock();
const has_pending = self.submission_queue.first != null;
self.submission_mutex.unlock();
if (!has_pending) break;
if (self.shutdown.load(.acquire)) {
// Force-disconnect any still-live CDP clients so their worker
// threads wake from curl_multi_poll and exit. A worker is woken
// only by this (Network) thread; if the loop exits with links
// still live, those workers block forever and Server.deinit()
// spins on active_threads (issue #2510). Idempotent — a no-op
// once the links are drained.
self.shutdownCdpLinks();
if (running_handles == 0) {
// Check if fireTicks submitted new requests (e.g. telemetry
// flush). If so, continue the loop to drain and send them
// before exiting.
self.submission_mutex.lock();
const has_pending = self.submission_queue.first != null;
self.submission_mutex.unlock();
if (!has_pending) break;
}
}
}