From aac0a6e6b652c51b52856507043f1e07fb0726a1 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 17 Apr 2026 11:20:27 +0800 Subject: [PATCH 1/6] Websocket fixes. This commit fixes a few serious issues with the Websocket implementation. 1 - libcurl recursive api calls Creating a Websocket instance from within a libcurl callback results in libcurl failing with a RecursiveApiCall error. I fixed this more generally by adding a `ready_queue` which connections can use when the `HttpClient` is performing actions. Once `perform` ends, this new `ready_queue` is processed. There might be a more holistic solution to this (we seem to run into RecursiveApiCall everywhere), but since HttpClient is going through heavy changes, this seemed like the smallest possible change to fix it. 2 - "load" blocking Load and IdleNetwork notifications should not block on Websocket connections. To solve this, `HttpClient` now ha `http_active` and `ws_active` to replace `active`. Only `http_active` is used for things like "load" triggering. 3 - The above change made the Runner's job more complicated. It used to be binary: you either have active connections or not. Now there are different types of active connections. To keep it simple, and I think probably more correct, the "done-ness" (based on the `wait` parameter) is now independent of active (or not) network activity. If the page's `load_state == .complete`, then the `wait == .done` is considered successful, whether or not we have active connections. 4 - As a consequence of the above, and seemingly unrelated to all of these changes, a number of html tests now use the "new" robust async framework. Most of these tests were using the `testing.onload` (aka `testing.eventually`) which had somewhat...unclear semantics. These tests passed more of a consequence of how we processed a page and being very simple (e.g. just needing 1 micro or macrotask tick). But `eventually` never worked for more complicated cases, and the previous `testing.async` didn't work well. Now, the test runner waits for .load (which, as per #3, can fire more aggressively), which caused many `eventually` tests to fail. Moving these tests to the new `async` is more robust and works with the new aggressive "load". --- src/browser/HttpClient.zig | 106 ++++-- src/browser/Runner.zig | 36 +-- src/browser/tests/animation/animation.html | 11 +- src/browser/tests/document/write.html | 23 +- src/browser/tests/event/abort_controller.html | 23 +- src/browser/tests/frames/post_message.html | 6 +- src/browser/tests/frames/target.html | 8 +- src/browser/tests/net/fetch.html | 305 ++++++++++-------- src/browser/tests/net/websocket.html | 18 ++ src/browser/tests/page/encoding.html | 39 ++- src/browser/tests/testing.js | 4 +- src/browser/tests/window/timers.html | 48 +-- src/browser/tests/worker/worker.html | 262 +++++++-------- src/cdp/domains/page.zig | 2 +- src/network/http.zig | 3 +- src/testing.zig | 2 +- 16 files changed, 529 insertions(+), 367 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index f60605c9..c937569f 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -57,8 +57,11 @@ pub const HeaderIterator = http.HeaderIterator; // those other http requests. pub const Client = @This(); -// Count of active requests -active: usize = 0, +// Count of active ws requests +ws_active: usize = 0, + +// Count of active http requests +http_active: usize = 0, // Count of intercepted requests. This is to help deal with intercepted requests. // The client doesn't track intercepted transfers. If a request is intercepted, @@ -87,6 +90,13 @@ next_request_id: u32 = 0, // When handles has no more available easys, requests get queued. queue: std.DoublyLinkedList = .{}, +// Queue is for Transfers that have no connection. ready_queue is for connections +// that were initiated when performing == true and thus need to wait until +// performing == false before being added. I'm hoping this is temporary and that +// we can unify the two queues. But HTTP is being changed a lot right now, and +// I'm trying to minimize the surface area. +ready_queue: std.DoublyLinkedList = .{}, + // The main app allocator allocator: Allocator, @@ -220,6 +230,13 @@ pub fn setTlsVerify(self: *Client, verify: bool) !void { const conn: *http.Connection = @fieldParentPtr("node", node); try conn.setTlsVerify(verify, self.use_proxy); } + + it = self.ready_queue.first; + while (it) |node| : (it = node.next) { + const conn: *http.Connection = @fieldParentPtr("node", node); + try conn.setTlsVerify(verify, self.use_proxy); + } + self.tls_verify = verify; } @@ -258,26 +275,8 @@ pub fn abortFrame(self: *Client, frame_id: u32) void { // Written this way so that both abort and abortFrame can share the same code // but abort can avoid the frame_id check at comptime. fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { - { - var n = self.in_use.first; - while (n) |node| { - n = node.next; - const conn: *http.Connection = @fieldParentPtr("node", node); - switch (conn.transport) { - .http => |transfer| { - if ((comptime abort_all) or transfer.req.frame_id == frame_id) { - transfer.kill(); - } - }, - .websocket => |ws| { - if ((comptime abort_all) or ws._page._frame_id == frame_id) { - ws.kill(); - } - }, - .none => unreachable, - } - } - } + abortConnections(self.in_use, abort_all, frame_id); + abortConnections(self.ready_queue, abort_all, frame_id); { var q = &self.queue; @@ -296,6 +295,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { if (comptime abort_all) { self.queue = .{}; + self.ready_queue = .{}; } if (comptime IS_DEBUG and abort_all) { @@ -312,7 +312,28 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { } leftover += 1; } - std.debug.assert(self.active == leftover); + std.debug.assert(self.http_active == leftover); + } +} + +fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_id: u32) void { + var n = list.first; + while (n) |node| { + n = node.next; + const conn: *http.Connection = @fieldParentPtr("node", node); + switch (conn.transport) { + .http => |transfer| { + if ((comptime abort_all) or transfer.req.frame_id == frame_id) { + transfer.kill(); + } + }, + .websocket => |ws| { + if ((comptime abort_all) or ws._page._frame_id == frame_id) { + ws.kill(); + } + }, + .none => unreachable, + } } } @@ -848,6 +869,11 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { self.releaseConn(conn); } + while (self.ready_queue.popFirst()) |node| { + const conn: *http.Connection = @fieldParentPtr("node", node); + try self.trackConn(conn); + } + // We're potentially going to block for a while until we get data. Process // whatever messages we have waiting ahead of time. if (try self.processMessages()) { @@ -1034,7 +1060,7 @@ fn processMessages(self: *Client) !bool { // Conn was removed from handles during redirect reconfiguration // but not re-added. Release it directly to avoid double-remove. self.in_use.remove(&c.node); - self.active -= 1; + self.http_active -= 1; self.releaseConn(c); transfer._detached_conn = null; } @@ -1046,6 +1072,7 @@ fn processMessages(self: *Client) !bool { } }, .websocket => |ws| { + // ws_active will be decremented through the call to disconnected if (msg.err) |err| switch (err) { error.GotNothing => ws.disconnected(null), else => ws.disconnected(err), @@ -1063,26 +1090,51 @@ fn processMessages(self: *Client) !bool { } pub fn trackConn(self: *Client, conn: *http.Connection) !void { + if (self.performing) { + conn.in_use = false; + self.ready_queue.append(&conn.node); + return; + } + self.in_use.append(&conn.node); + conn.in_use = true; // Set private pointer so readMessage can find the Connection. // Must be done each time since curl_easy_reset clears it when // connections are returned to pool. conn.setPrivate(conn) catch |err| { self.in_use.remove(&conn.node); + conn.in_use = false; self.releaseConn(conn); return err; }; self.handles.add(conn) catch |err| { self.in_use.remove(&conn.node); + conn.in_use = false; self.releaseConn(conn); return err; }; - self.active += 1; + + switch (conn.transport) { + .http => self.http_active += 1, + .websocket => self.ws_active += 1, + else => unreachable, + } } pub fn removeConn(self: *Client, conn: *http.Connection) void { + if (conn.in_use == false) { + self.ready_queue.remove(&conn.node); + self.releaseConn(conn); + return; + } + self.in_use.remove(&conn.node); - self.active -= 1; + conn.in_use = false; + switch (conn.transport) { + .http => self.http_active -= 1, + .websocket => self.ws_active -= 1, + else => unreachable, + } if (self.handles.remove(conn)) { self.releaseConn(conn); } else |_| { @@ -1097,7 +1149,7 @@ fn releaseConn(self: *Client, conn: *http.Connection) void { } fn ensureNoActiveConnection(self: *const Client) !void { - if (self.active > 0) { + if (self.http_active > 0 or self.ws_active > 0) { return error.InflightConnection; } } diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index fd3889e6..3a228c09 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -135,7 +135,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { .pre, .raw, .text, .image => { // The main page hasn't started/finished navigating. // There's no JS to run, and no reason to run the scheduler. - if (http_client.active == 0 and (comptime is_cdp) == false) { + if (http_client.http_active == 0 and (comptime is_cdp) == false) { // haven't started navigating, I guess. return .done; } @@ -162,14 +162,14 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { // download, or scheduled tasks to execute, or both. // scheduler.run could trigger new http transfers, so do not - // store http_client.active BEFORE this call and then use + // store http_client.http_active BEFORE this call and then use // it AFTER. try browser.runMacrotasks(); // Each call to this runs scheduled load events. try page.dispatchLoad(); - const http_active = http_client.active; + const http_active = http_client.http_active; const total_network_activity = http_active + http_client.intercepted; if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { page.notifyNetworkAlmostIdle(); @@ -178,9 +178,22 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { page.notifyNetworkIdle(); } - if (http_active == 0 and (comptime is_cdp == false)) { + switch (opts.until) { + .done => {}, + .domcontentloaded => if (page._load_state == .load or page._load_state == .complete) { + return .done; + }, + .load => if (page._load_state == .complete) { + return .done; + }, + .networkidle => if (page._notified_network_idle == .done) { + return .done; + }, + } + + if (http_active == 0 and http_client.ws_active == 0 and (comptime is_cdp == false)) { // we don't need to consider http_client.intercepted here - // because is_cdp is true, and that can only be + // because is_cdp is false, and that can only be // the case when interception isn't possible. if (comptime IS_DEBUG) { std.debug.assert(http_client.intercepted == 0); @@ -192,19 +205,6 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { browser.waitForBackgroundTasks(); } - switch (opts.until) { - .done => {}, - .domcontentloaded => if (page._load_state == .load or page._load_state == .complete) { - return .done; - }, - .load => if (page._load_state == .complete) { - return .done; - }, - .networkidle => if (page._notified_network_idle == .done) { - return .done; - }, - } - // We never advertise a wait time of more than 20, there can // always be new background tasks to run. if (browser.msToNextMacrotask()) |ms_to_next_task| { diff --git a/src/browser/tests/animation/animation.html b/src/browser/tests/animation/animation.html index 1cfb768a..a4ba75a1 100644 --- a/src/browser/tests/animation/animation.html +++ b/src/browser/tests/animation/animation.html @@ -1,7 +1,9 @@ -