From bdf28c51cd31e24b8ec26fb283c6cb570ee0a90a Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 20 May 2026 20:43:55 +0200 Subject: [PATCH 1/6] network: terminate live CDP connections on shutdown The CDP server ignored a single SIGTERM while a connection was live: the process only exited if the socket was closed before the signal, or after a third signal. A conventional one-shot graceful stop (SIGTERM then waitpid) hung. On shutdown the sighandler runs Network.stop (which sets `shutdown` and lets the run loop exit) before Server.shutdown. A live CDP worker parks in curl_multi_poll and is woken ONLY by the Network thread via dropCdp -> handles.wakeup(). Once the run loop exits with links still live, nothing can wake those workers, so Server.deinit()'s `while (active_threads > 0)` loop spins forever. Drop every still-live CDP link from the run loop when `shutdown` is set, reusing the existing peer-EOF path: dropCdp(notify=true) pushes a .disconnect into the worker's inbox and wakes it, so cdp.tick() returns false and the worker exits before the loop breaks. --- src/network/Network.zig | 54 +++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/network/Network.zig b/src/network/Network.zig index 13243d08..54b19d5d 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -683,6 +683,35 @@ fn processCdpEvents(self: *Network) void { } } +// On shutdown, force-disconnect every still-live CDP link. Each link's +// worker thread blocks in curl_multi_poll and is woken ONLY by this +// (Network) thread via dropCdp -> handles.wakeup(). If the run loop +// exits with links still live, those workers never wake and +// Server.deinit() spins on active_threads forever (issue #2510). +// Mirrors the peer-EOF path in processCdpEvents: dropCdp(notify=true) +// pushes a .disconnect into the worker's inbox and wakes it, so +// cdp.tick() returns false and the worker exits. +fn shutdownCdpLinks(self: *Network) void { + self.cdp_mutex.lock(); + defer self.cdp_mutex.unlock(); + + var any_removed = false; + var it = self.cdp_links.first; + while (it) |node| { + const next = node.next; + const link: *CdpLink = @fieldParentPtr("node", node); + if (link.state == .live) { + self.dropCdp(link, null, true); + any_removed = true; + } + it = next; + } + + if (any_removed) { + self.cdp_unregister.broadcast(); + } +} + pub fn run(self: *Network) void { var drain_buf: [64]u8 = undefined; var running_handles: c_int = 0; @@ -760,13 +789,24 @@ pub fn run(self: *Network) void { self.fireTicks(); - if (self.shutdown.load(.acquire) and running_handles == 0) { - // Check if fireTicks submitted new requests (e.g. telemetry flush). - // If so, continue the loop to drain and send them before exiting. - self.submission_mutex.lock(); - const has_pending = self.submission_queue.first != null; - self.submission_mutex.unlock(); - if (!has_pending) break; + if (self.shutdown.load(.acquire)) { + // Force-disconnect any still-live CDP clients so their worker + // threads wake from curl_multi_poll and exit. A worker is woken + // only by this (Network) thread; if the loop exits with links + // still live, those workers block forever and Server.deinit() + // spins on active_threads (issue #2510). Idempotent — a no-op + // once the links are drained. + self.shutdownCdpLinks(); + + if (running_handles == 0) { + // Check if fireTicks submitted new requests (e.g. telemetry + // flush). If so, continue the loop to drain and send them + // before exiting. + self.submission_mutex.lock(); + const has_pending = self.submission_queue.first != null; + self.submission_mutex.unlock(); + if (!has_pending) break; + } } } From 88b98e705ff3e7e1d1d0e38aca1442615825377b Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 21 May 2026 10:38:52 +0800 Subject: [PATCH 2/6] Capture disconnect/close in Worker Currently, if a disconnect/close is captured in a worker during a syncRequest, that specific request is terminated, but the error doesn't bubble up. The worker remains alive and will subsequently block in a perform, with no connection alive to wake it up. In this commit, when disconnect/close is received, inbox.terminate is set to true. This flag is checked (in syncRequest and http_client.tick) and error.ClientDisconnected is returned. (Also, on network shutdown, always broadcast the cdp_unregister since there's no harm in sending an extra signal even if nothing was removed). --- src/Inbox.zig | 7 +++++++ src/browser/HttpClient.zig | 10 ++++++++++ src/network/Network.zig | 19 +++++++------------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/Inbox.zig b/src/Inbox.zig index ce9c7d78..1828f0f6 100644 --- a/src/Inbox.zig +++ b/src/Inbox.zig @@ -39,6 +39,13 @@ const Inbox = @This(); mutex: std.Thread.Mutex = .{}, queue: DoublyLinkedList = .{}, +// One-way latch, set by the worker's drainInbox the first time it +// observes a .disconnect (or .close) and never cleared. Ensures that, on +// multiple drains, the terminated state is preserved / communicated. This is +// specifically meant to handle the case where a disconnect is captured during +// a syncRequest and we want the following non-nested tick to pick it up again. +terminated: bool = false, + pub fn deinit(self: *Inbox, arena_pool: *ArenaPool) void { self.mutex.lock(); defer self.mutex.unlock(); diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index df91ac42..d49fa484 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -398,6 +398,10 @@ pub fn abortRequests(_: *Client, owner: *Owner) void { const DrainMode = enum { all, sync_wait }; pub fn tick(self: *Client, timeout_ms: u32, mode: DrainMode) !void { + if (self.inbox.terminated) { + return error.ClientDisconnected; + } + try self.drainQueue(); try self.perform(@intCast(timeout_ms)); // perform/processMessages just released a batch of connections back to @@ -536,6 +540,10 @@ const SyncContext = struct { }; pub fn syncRequest(self: *Client, allocator: Allocator, req: Request) !SyncResponse { + if (self.inbox.terminated) { + return error.ClientDisconnected; + } + var sync_ctx = SyncContext{ .allocator = allocator, .body = .empty }; errdefer sync_ctx.body.deinit(allocator); @@ -719,10 +727,12 @@ fn drainInbox(self: *Client, mode: DrainMode) !void { .close => { cdp.onClose(); cdp.onDisconnect(null); + self.inbox.terminated = true; return error.ClientDisconnected; }, .disconnect => |err| { cdp.onDisconnect(err); + self.inbox.terminated = true; return error.ClientDisconnected; }, } diff --git a/src/network/Network.zig b/src/network/Network.zig index 54b19d5d..addb1796 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -695,21 +695,17 @@ fn shutdownCdpLinks(self: *Network) void { self.cdp_mutex.lock(); defer self.cdp_mutex.unlock(); - var any_removed = false; var it = self.cdp_links.first; while (it) |node| { const next = node.next; const link: *CdpLink = @fieldParentPtr("node", node); if (link.state == .live) { self.dropCdp(link, null, true); - any_removed = true; } it = next; } - if (any_removed) { - self.cdp_unregister.broadcast(); - } + self.cdp_unregister.broadcast(); } pub fn run(self: *Network) void { @@ -790,12 +786,8 @@ pub fn run(self: *Network) void { self.fireTicks(); if (self.shutdown.load(.acquire)) { - // Force-disconnect any still-live CDP clients so their worker - // threads wake from curl_multi_poll and exit. A worker is woken - // only by this (Network) thread; if the loop exits with links - // still live, those workers block forever and Server.deinit() - // spins on active_threads (issue #2510). Idempotent — a no-op - // once the links are drained. + // Drain any live CDP links so their workers can exit (issue #2510). + // Idempo tent — no-op once drained, safe to call every iteration self.shutdownCdpLinks(); if (running_handles == 0) { @@ -805,7 +797,10 @@ pub fn run(self: *Network) void { self.submission_mutex.lock(); const has_pending = self.submission_queue.first != null; self.submission_mutex.unlock(); - if (!has_pending) break; + + if (!has_pending) { + break; + } } } } From 901eece85312de26c4124c92d831f7b686054c67 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Thu, 21 May 2026 12:18:28 +0200 Subject: [PATCH 3/6] network: add regression test for the CDP disconnect latch (#2510) Drives a .disconnect through the worker's inbox and asserts a second tick still returns error.ClientDisconnected once the inbox is empty. Fails against the pre-latch HttpClient (the worker would re-park in perform with no producer to wake it); passes with the latch. --- src/cdp/CDP.zig | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 01219e70..477f5de1 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -1331,3 +1331,27 @@ test "cdp: STARTUP sessionId" { try ctx.expectSentResult(null, .{ .id = 4, .index = 2, .session_id = "STARTUP" }); } } + +test "cdp: disconnect latches so the worker keeps exiting" { + var ctx = try testing.context(); + defer ctx.deinit(); + + const client = &ctx.cdp().browser.http_client; + + // Simulate the Network thread delivering a peer disconnect into the + // worker's inbox — the dropCdp(notify=true) path used on peer EOF and, + // since #2510, on shutdown via shutdownCdpLinks. + { + const arena = try client.arena_pool.acquire(.tiny, "test disconnect"); + client.inbox.push(arena, .{ .disconnect = null }); + } + + // First tick drains the .disconnect and tears the link down. + try testing.expectError(error.ClientDisconnected, client.tick(0, .all)); + + // The inbox is now empty. Without the latch this second tick would fall + // through to perform/poll with no producer left to wake it, so the worker + // would never exit and Server.deinit() would spin on active_threads + // (#2510). The latch keeps the terminal state sticky so the worker exits. + try testing.expectError(error.ClientDisconnected, client.tick(0, .all)); +} From baf1b20532616399c4dc0efd4a409d065a791497 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Thu, 21 May 2026 12:36:45 +0200 Subject: [PATCH 4/6] network: free request headers when syncRequest short-circuits on disconnect syncRequest's `terminated` early return (added in 88b98e70) exits before request() takes ownership of req.headers, so the curl_slist leaked after a latched disconnect (any nested fetch / importScripts / external stylesheet). Free it in the early return, mirroring request()'s own failure paths. Also adds a behavioral syncRequest-after-disconnect test. The leak itself isn't assertable here (curl_slist is C-allocated through the tracking allocator, outside the per-test leak check), so it's verified by the ownership contract rather than the suite. --- src/browser/HttpClient.zig | 3 +++ src/cdp/CDP.zig | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index d49fa484..6c0059ee 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -541,6 +541,9 @@ const SyncContext = struct { pub fn syncRequest(self: *Client, allocator: Allocator, req: Request) !SyncResponse { if (self.inbox.terminated) { + // request() takes ownership of req.headers on every path; we return + // before calling it, so free the curl_slist here to avoid leaking it. + req.headers.deinit(); return error.ClientDisconnected; } diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 477f5de1..204c8395 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -1355,3 +1355,36 @@ test "cdp: disconnect latches so the worker keeps exiting" { // (#2510). The latch keeps the terminal state sticky so the worker exits. try testing.expectError(error.ClientDisconnected, client.tick(0, .all)); } + +test "cdp: syncRequest short-circuits after disconnect" { + var ctx = try testing.context(); + defer ctx.deinit(); + + const client = &ctx.cdp().browser.http_client; + + // Latch terminated via a drained disconnect (as above). + { + const arena = try client.arena_pool.acquire(.tiny, "test disconnect"); + client.inbox.push(arena, .{ .disconnect = null }); + } + try testing.expectError(error.ClientDisconnected, client.tick(0, .all)); + + // A synchronous fetch attempted after the latch returns ClientDisconnected + // without starting the request. syncRequest also frees req.headers on this + // early-return path (it returns before request() takes ownership); that + // free isn't asserted here because curl_slist is C-allocated and escapes the + // per-test leak check, so it's verified by review. The latch check returns + // before any other req field is read, so the rest are placeholders. + const headers = try client.newHeaders(); + try testing.expectError(error.ClientDisconnected, client.syncRequest(testing.allocator, .{ + .frame_id = 0, + .loader_id = 0, + .method = .GET, + .url = "http://127.0.0.1:9582/", + .headers = headers, + .cookie_jar = null, + .cookie_origin = "", + .resource_type = .fetch, + .notification = undefined, + })); +} From 3bfc434b3b2526431fe5bf02b6d02a55be3f6fb5 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Thu, 21 May 2026 12:40:48 +0200 Subject: [PATCH 5/6] network: fix typo in shutdown comment (Idempotent) --- src/network/Network.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network/Network.zig b/src/network/Network.zig index addb1796..b947b420 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -787,7 +787,7 @@ pub fn run(self: *Network) void { if (self.shutdown.load(.acquire)) { // Drain any live CDP links so their workers can exit (issue #2510). - // Idempo tent — no-op once drained, safe to call every iteration + // Idempotent — no-op once drained, safe to call every iteration self.shutdownCdpLinks(); if (running_handles == 0) { From 64a6e4012188f35d94ee73e3263a0f6824789d7b Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Thu, 21 May 2026 15:56:22 +0200 Subject: [PATCH 6/6] network: advance shutdownCdpLinks iterator before dropCdp --- src/network/Network.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/network/Network.zig b/src/network/Network.zig index b947b420..fb03e690 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -697,12 +697,11 @@ fn shutdownCdpLinks(self: *Network) void { var it = self.cdp_links.first; while (it) |node| { - const next = node.next; + it = node.next; const link: *CdpLink = @fieldParentPtr("node", node); if (link.state == .live) { self.dropCdp(link, null, true); } - it = next; } self.cdp_unregister.broadcast();