From 7237c377d326311fc59e65cdc45c13273d1cf361 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Tue, 28 Apr 2026 03:21:12 +0200 Subject: [PATCH 01/36] browser: send Referer on cross-page navigation requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anchor click, form submit, and `location.href = ...` assignments queue a navigation through `Frame.scheduleNavigation`, which then tears down the originating page and rebuilds the frame in `Session.processRootQueuedNavigation` before `Frame.navigate` issues the HTTP request. The originator's URL was discarded with the old arena, so the request went out without a Referer header — even though the HTML "navigate" algorithm and Fetch §4.5 require one. `Frame.headersForRequest` (#1449) handled subresource fetches but was never called from the navigation path. Capture the originating frame's URL into a new `referer` field on `NavigateOpts` at scheduling time, dup'd into the `QueuedNavigation` arena so it survives the page tear-down. `Frame.navigate` adds it as a `Referer:` header alongside the existing per-request headers. Iframe initial navigation (`Frame.zig:1282`) also sets `referer = parent.url` since the parent frame outlives that direct `navigate` call. CDP `Page.navigate` (`.reason = .address_bar`) and `Page.reload` continue to omit Referer — matches Chrome. Closes #2281 --- src/browser/Frame.zig | 26 ++++++++++++- src/cdp/domains/page.zig | 83 ++++++++++++++++++++++++++++++++++++++++ src/testing.zig | 33 ++++++++++++++++ 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index 8b0c462b..55b4d489 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -622,6 +622,10 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo if (opts.header) |hdr| { try headers.add(hdr); } + if (opts.referer) |ref| { + const ref_header = try std.mem.concatWithSentinel(self.arena, u8, &.{ "Referer: ", ref }, 0); + try headers.add(ref_header); + } // We dispatch frame_navigate event before sending the request. // It ensures the event frame_navigated is not dispatched before this one. session.notification.dispatch(.frame_navigate, &.{ @@ -754,9 +758,18 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url session.browser.http_client.abortFrame(target._frame_id); } + // Capture the originating frame's URL as the Referer for this + // navigation. The originator's frame may be torn down before navigate() + // runs (processRootQueuedNavigation rebuilds the Page in-place), so dup + // into the QueuedNavigation arena which outlives that tear-down. + var nav_opts = opts; + if (nav_opts.referer == null and std.mem.startsWith(u8, originator.url, "http")) { + nav_opts.referer = try arena.dupe(u8, originator.url); + } + const qn = try arena.create(QueuedNavigation); qn.* = .{ - .opts = opts, + .opts = nav_opts, .arena = arena, .url = resolved_url, .is_about_blank = is_about_blank, @@ -1279,7 +1292,12 @@ pub fn iframeAddedCallback(self: *Frame, iframe: *IFrame) !void { ); }; - new_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| { + new_frame.navigate(url, .{ + .reason = .initialFrameNavigation, + // Iframe's initial src request carries the parent's URL as Referer. + // Parent frame outlives this navigate() call, so the slice is safe. + .referer = if (std.mem.startsWith(u8, self.url, "http")) self.url else null, + }) catch |err| { log.warn(.frame, "iframe navigate failure", .{ .url = url, .err = err }); self._pending_loads -= 1; iframe._window = null; @@ -3452,6 +3470,10 @@ pub const NavigateOpts = struct { method: HttpClient.Method = .GET, body: ?[]const u8 = null, header: ?[:0]const u8 = null, + // Set by scheduleNavigationWithArena from the originating frame's URL so + // anchor click / form submit / location.href navigations carry a Referer. + // null on CDP Page.navigate (address-bar) and Page.reload — matches Chrome. + referer: ?[]const u8 = null, force: bool = false, kind: NavigationKind = .{ .push = null }, }; diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index ee6f0990..c77319ae 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -1182,6 +1182,89 @@ test "cdp.frame: navigate inherits original fragment across redirect" { } } +test "cdp.frame: anchor click sends Referer matching the originating page" { + // HTML Living Standard "navigate" algorithm + Fetch §4.5 "request's referrer": + // when a navigation is initiated by a hyperlink click (or form submit, or + // location.href assignment), the resulting request carries a Referer + // header equal to the originating document's URL. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18"; + bc.session_id = "SID-A18"; + bc.target_id = "TID-A18-000000".*; + + // Initial navigation to the page hosting the anchor — driven directly via + // Frame.navigate(.address_bar), so this request itself has no Referer. + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/referer_link.html", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // Click the anchor via JS. The click goes through Frame.scheduleNavigation + // (.reason = .script), which must capture the originating frame's URL as + // the Referer for the queued navigation. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + _ = try ls.local.exec("document.getElementById('link').click()", null); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // After the click navigation completes, the loaded page is /echo_referer + // and its body echoes the Referer header the server actually saw. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec( + "document.body.innerText.includes('referer=http://127.0.0.1:9582/referer_link.html')", + null, + ); + try testing.expect(v.toBool()); + } +} + +test "cdp.frame: address-bar Page.navigate sends no Referer" { + // Regression guard: navigations initiated by the user agent itself (CDP + // Page.navigate, address-bar typed URLs, Page.reload) must not leak the + // previous page's URL as Referer. Matches Chrome. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18B"; + bc.session_id = "SID-A18B"; + bc.target_id = "TID-A18B-00000".*; + + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/echo_referer", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec("document.body.innerText.includes('referer=NONE')", null); + try testing.expect(v.toBool()); + } +} + test "cdp.frame: addScriptToEvaluateOnNewDocument" { var ctx = try testing.context(); defer ctx.deinit(); diff --git a/src/testing.zig b/src/testing.zig index 2fb04668..1ea704b0 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -662,6 +662,39 @@ fn testHTTPHandler(req: *std.http.Server.Request) !void { }); } + if (std.mem.eql(u8, path, "/echo_referer")) { + // Echo the request's Referer header back as HTML so tests can assert + // what Referer the navigation sent. Used by the cross-page Referer test. + var it = req.iterateHeaders(); + var referer: []const u8 = "NONE"; + while (it.next()) |h| { + if (std.ascii.eqlIgnoreCase(h.name, "Referer")) { + referer = h.value; + break; + } + } + var html_buf: [512]u8 = undefined; + const html = try std.fmt.bufPrint(&html_buf, "referer={s}", .{referer}); + return req.respond(html, .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }); + } + + if (std.mem.eql(u8, path, "/referer_link.html")) { + // Page with an anchor link to /echo_referer. The test clicks the link + // via JS and asserts the resulting page reports Referer = this page. + return req.respond( + "go", + .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }, + ); + } + if (std.mem.eql(u8, path, "/echo_method")) { // Echo the request method back as HTML so tests can assert on what // method the navigation used. Used by the Page.reload-replays-POST test. From 7feae5a958ab62082576b8391c7b118ebe51cd66 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Tue, 28 Apr 2026 05:25:47 +0200 Subject: [PATCH 02/36] events: implement KeyboardEvent.keyCode and charCode legacy attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both getters were stubs returning 0. Per W3C UI Events § Annex C, keyCode should report the legacy fixed virtual key code for the key being pressed (e.g. 84 for 't'/'T', 16 for Shift, 13 for Enter), and charCode should report the Unicode code point of the character produced on keypress events. Adds a keyCode() method to the Key union that maps each named variant to its spec-defined value and computes the uppercase-ASCII fallback for .standard printable characters. getKeyCode delegates to it. getCharCode checks the event type and returns the first byte of _key.standard for keypress, 0 elsewhere. For shift-modified symbol keys (e.g. shift+1='!'), keyCode falls back to the modified char's ASCII rather than the unmodified key's value, since KeyboardEvent doesn't currently store the unmodified key. Spec-strict behavior would need plumbing unmodifiedText through KeyboardEventOptions — left as a follow-up. Closes #2291 --- src/browser/tests/event/keyboard.html | 82 +++++++++++++++- src/browser/webapi/event/KeyboardEvent.zig | 105 ++++++++++++++++++++- 2 files changed, 178 insertions(+), 9 deletions(-) diff --git a/src/browser/tests/event/keyboard.html b/src/browser/tests/event/keyboard.html index 223a9e0b..35ed4426 100644 --- a/src/browser/tests/event/keyboard.html +++ b/src/browser/tests/event/keyboard.html @@ -132,11 +132,85 @@ } - + + + + + + + + + + diff --git a/src/browser/webapi/event/KeyboardEvent.zig b/src/browser/webapi/event/KeyboardEvent.zig index 979b5c29..6b837f6e 100644 --- a/src/browser/webapi/event/KeyboardEvent.zig +++ b/src/browser/webapi/event/KeyboardEvent.zig @@ -160,6 +160,94 @@ pub const Key = union(enum) { else => |k| @tagName(k), }; } + + /// Legacy `KeyboardEvent.keyCode` value per UI Events spec § Annex C + /// (https://www.w3.org/TR/uievents/#legacy-key-attributes). Returns 0 for + /// keys without a defined fixed virtual key code. + /// + /// Spec note: for printable characters, `keyCode` is calculated from the + /// **unmodified** key value's uppercase ASCII. We don't track the + /// unmodified key, so we uppercase `_key` instead. This is exact for + /// letters (uppercase('t') == uppercase('T')) and digits, but for + /// shift-modified symbols (e.g., shift+1='!') it returns the modified + /// char's ASCII rather than the digit's keyCode. Callers needing + /// spec-strict behavior should pass `unmodifiedText` through CDP + /// `Input.dispatchKeyEvent` and use that instead. + pub fn keyCode(self: Key) u32 { + return switch (self) { + // Modifier keys + .Alt, .AltGraph => 18, + .CapsLock => 20, + .Control => 17, + .Meta, .Hyper, .Super => 91, + .NumLock => 144, + .ScrollLock => 145, + .Shift => 16, + + // Whitespace keys (Space hits the .standard path below) + .Enter => 13, + .Tab => 9, + + // Navigation keys + .ArrowDown => 40, + .ArrowLeft => 37, + .ArrowRight => 39, + .ArrowUp => 38, + .End => 35, + .Home => 36, + .PageDown => 34, + .PageUp => 33, + + // Editing keys + .Backspace => 8, + .Clear => 12, + .Delete => 46, + .Insert => 45, + + // UI keys + .Cancel => 3, + .ContextMenu => 93, + .Escape => 27, + .Execute => 43, + .Help => 47, + .Pause => 19, + .Select => 41, + + // Function keys + .F1 => 112, + .F2 => 113, + .F3 => 114, + .F4 => 115, + .F5 => 116, + .F6 => 117, + .F7 => 118, + .F8 => 119, + .F9 => 120, + .F10 => 121, + .F11 => 122, + .F12 => 123, + + .standard => |s| { + if (s.len == 0) return 0; + const c = s[0]; + // Letters: uppercase ASCII + if (c >= 'a' and c <= 'z') return c - 'a' + 'A'; + if (c >= 'A' and c <= 'Z') return c; + // Digits: ASCII + if (c >= '0' and c <= '9') return c; + // Space: 32 — also returned via the ASCII fall-through below, + // but called out for clarity since it's the most common case. + if (c == ' ') return 32; + // Other ASCII chars (best-effort: legacy keyCode for symbols + // is platform-specific and depends on the unmodified key, + // which we don't track). + return c; + }, + + // Keys without a defined legacy keyCode + else => 0, + }; + } }; pub const Location = enum(i32) { @@ -270,15 +358,22 @@ pub fn getShiftKey(self: *const KeyboardEvent) bool { return self._shift_key; } -// Deprecated: tracked as 0 since we don't synthesise legacy character codes. +// https://www.w3.org/TR/uievents/#dom-keyboardevent-charcode +// charCode is the Unicode code point of the character produced by the key, +// and is only meaningful on `keypress` events. For `keydown` and `keyup` it +// is 0. (Deprecated, but read by legacy event handlers.) pub fn getCharCode(self: *const KeyboardEvent) u32 { - _ = self; - return 0; + const event = self._proto._proto; + if (!std.mem.eql(u8, event._type_string.str(), "keypress")) return 0; + return switch (self._key) { + .standard => |s| if (s.len > 0) s[0] else 0, + else => 0, + }; } +// https://www.w3.org/TR/uievents/#dom-keyboardevent-keycode pub fn getKeyCode(self: *const KeyboardEvent) u32 { - _ = self; - return 0; + return self._key.keyCode(); } pub fn initKeyboardEvent( From 73a007f88e9174ae276f7ed90a99863dff8c44b5 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Tue, 28 Apr 2026 07:12:29 +0200 Subject: [PATCH 03/36] events: gate KeyboardEvent.keyCode/charCode on isTrusted, add Enter charCode Address review feedback on the legacy KeyboardEvent.keyCode and KeyboardEvent.charCode getters: * `getKeyCode` and `getCharCode` now early-return 0 when the event is not trusted, matching Chrome's behavior. Synthetic events created via `new KeyboardEvent(...)` from script have `isTrusted === false` and therefore expose 0 for both legacy attributes; only events dispatched by the user agent itself surface the legacy mapping. * `getCharCode` now uses `_type_string.eql(comptime .wrap("keypress"))` to match the idiom used elsewhere in the project. * The charCode mapping is moved into a pure `Key.charCode()` helper that mirrors `Key.keyCode()`, including a `.Enter => 13` arm so a trusted `keypress` for Enter exposes `\r` (U+000D) per spec. The JS test fixture is consolidated into a single block asserting the Chrome-correct behavior for synthetic events. The full per-key mapping table is now exercised via two pure-function Zig unit tests on `Key.keyCode()` and `Key.charCode()`. --- src/browser/tests/event/keyboard.html | 94 +++++-------------- src/browser/webapi/event/KeyboardEvent.zig | 101 ++++++++++++++++++++- 2 files changed, 117 insertions(+), 78 deletions(-) diff --git a/src/browser/tests/event/keyboard.html b/src/browser/tests/event/keyboard.html index 35ed4426..6f81a046 100644 --- a/src/browser/tests/event/keyboard.html +++ b/src/browser/tests/event/keyboard.html @@ -132,85 +132,33 @@ } - + // Per Chrome behavior, both legacy `keyCode` and `charCode` return 0 for + // events constructed via `new KeyboardEvent(...)` because such events + // have `isTrusted === false`. The legacy mapping (a -> 65, Enter -> 13, + // etc.) is only exposed for events dispatched by the user agent itself. + // The full mapping table is exercised via Zig unit tests in + // `webapi/event/KeyboardEvent.zig`. - - - - - - - - diff --git a/src/browser/webapi/event/KeyboardEvent.zig b/src/browser/webapi/event/KeyboardEvent.zig index 6b837f6e..387a5d23 100644 --- a/src/browser/webapi/event/KeyboardEvent.zig +++ b/src/browser/webapi/event/KeyboardEvent.zig @@ -248,6 +248,18 @@ pub const Key = union(enum) { else => 0, }; } + + /// Legacy `KeyboardEvent.charCode` value per UI Events spec § Annex C + /// (https://www.w3.org/TR/uievents/#legacy-key-attributes). Returns the + /// Unicode code point of the character produced by the key. Only + /// meaningful inside a `keypress` event — callers must gate accordingly. + pub fn charCode(self: Key) u32 { + return switch (self) { + .Enter => 13, + .standard => |s| if (s.len > 0) s[0] else 0, + else => 0, + }; + } }; pub const Location = enum(i32) { @@ -362,17 +374,22 @@ pub fn getShiftKey(self: *const KeyboardEvent) bool { // charCode is the Unicode code point of the character produced by the key, // and is only meaningful on `keypress` events. For `keydown` and `keyup` it // is 0. (Deprecated, but read by legacy event handlers.) +// +// Chrome returns 0 for synthetic events (those created via +// `new KeyboardEvent(...)` rather than dispatched by the user agent), so we +// gate on `_is_trusted` to match. pub fn getCharCode(self: *const KeyboardEvent) u32 { const event = self._proto._proto; - if (!std.mem.eql(u8, event._type_string.str(), "keypress")) return 0; - return switch (self._key) { - .standard => |s| if (s.len > 0) s[0] else 0, - else => 0, - }; + if (event._is_trusted == false) return 0; + if (event._type_string.eql(comptime .wrap("keypress")) == false) return 0; + return self._key.charCode(); } // https://www.w3.org/TR/uievents/#dom-keyboardevent-keycode +// +// As with `charCode`, Chrome returns 0 for synthetic events. pub fn getKeyCode(self: *const KeyboardEvent) u32 { + if (self._proto._proto._is_trusted == false) return 0; return self._key.keyCode(); } @@ -462,3 +479,77 @@ const testing = @import("../../../testing.zig"); test "WebApi: KeyboardEvent" { try testing.htmlRunner("event/keyboard.html", .{}); } + +test "KeyboardEvent: Key.keyCode mapping" { + // Letters: uppercase ASCII regardless of case. + try testing.expectEqual(@as(u32, 65), Key.keyCode(.{ .standard = "a" })); + try testing.expectEqual(@as(u32, 65), Key.keyCode(.{ .standard = "A" })); + try testing.expectEqual(@as(u32, 84), Key.keyCode(.{ .standard = "T" })); + try testing.expectEqual(@as(u32, 90), Key.keyCode(.{ .standard = "z" })); + + // Digits. + try testing.expectEqual(@as(u32, 48), Key.keyCode(.{ .standard = "0" })); + try testing.expectEqual(@as(u32, 53), Key.keyCode(.{ .standard = "5" })); + try testing.expectEqual(@as(u32, 57), Key.keyCode(.{ .standard = "9" })); + + // Space. + try testing.expectEqual(@as(u32, 32), Key.keyCode(.{ .standard = " " })); + + // Modifier keys. + try testing.expectEqual(@as(u32, 16), Key.keyCode(.Shift)); + try testing.expectEqual(@as(u32, 17), Key.keyCode(.Control)); + try testing.expectEqual(@as(u32, 18), Key.keyCode(.Alt)); + try testing.expectEqual(@as(u32, 91), Key.keyCode(.Meta)); + try testing.expectEqual(@as(u32, 20), Key.keyCode(.CapsLock)); + + // Whitespace keys. + try testing.expectEqual(@as(u32, 13), Key.keyCode(.Enter)); + try testing.expectEqual(@as(u32, 9), Key.keyCode(.Tab)); + + // Navigation keys. + try testing.expectEqual(@as(u32, 37), Key.keyCode(.ArrowLeft)); + try testing.expectEqual(@as(u32, 38), Key.keyCode(.ArrowUp)); + try testing.expectEqual(@as(u32, 39), Key.keyCode(.ArrowRight)); + try testing.expectEqual(@as(u32, 40), Key.keyCode(.ArrowDown)); + try testing.expectEqual(@as(u32, 33), Key.keyCode(.PageUp)); + try testing.expectEqual(@as(u32, 34), Key.keyCode(.PageDown)); + try testing.expectEqual(@as(u32, 35), Key.keyCode(.End)); + try testing.expectEqual(@as(u32, 36), Key.keyCode(.Home)); + + // Editing keys. + try testing.expectEqual(@as(u32, 8), Key.keyCode(.Backspace)); + try testing.expectEqual(@as(u32, 46), Key.keyCode(.Delete)); + try testing.expectEqual(@as(u32, 45), Key.keyCode(.Insert)); + + // UI keys. + try testing.expectEqual(@as(u32, 27), Key.keyCode(.Escape)); + try testing.expectEqual(@as(u32, 19), Key.keyCode(.Pause)); + try testing.expectEqual(@as(u32, 93), Key.keyCode(.ContextMenu)); + + // Function keys. + try testing.expectEqual(@as(u32, 112), Key.keyCode(.F1)); + try testing.expectEqual(@as(u32, 123), Key.keyCode(.F12)); + + // Keys without a defined fixed virtual key code. + try testing.expectEqual(@as(u32, 0), Key.keyCode(.Dead)); + try testing.expectEqual(@as(u32, 0), Key.keyCode(.Unidentified)); + try testing.expectEqual(@as(u32, 0), Key.keyCode(.{ .standard = "" })); +} + +test "KeyboardEvent: Key.charCode mapping" { + // Printable characters: Unicode code point of the first byte. + try testing.expectEqual(@as(u32, 97), Key.charCode(.{ .standard = "a" })); + try testing.expectEqual(@as(u32, 65), Key.charCode(.{ .standard = "A" })); + try testing.expectEqual(@as(u32, 48), Key.charCode(.{ .standard = "0" })); + try testing.expectEqual(@as(u32, 32), Key.charCode(.{ .standard = " " })); + + // Enter is the one named key that produces a charCode (\r = 13). + try testing.expectEqual(@as(u32, 13), Key.charCode(.Enter)); + + // Other named keys and the empty standard key produce no character. + try testing.expectEqual(@as(u32, 0), Key.charCode(.Tab)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.Escape)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.ArrowLeft)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.Shift)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.{ .standard = "" })); +} From ece2ec37539a8606ee27805ccbf62055e3ee87c1 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 28 Apr 2026 08:34:58 +0200 Subject: [PATCH 04/36] ci: create a matrix for e2e tests --- .github/workflows/e2e-test.yml | 127 ++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 51 deletions(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 35584881..9dd32da0 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -63,8 +63,16 @@ jobs: zig-out/bin/lightpanda retention-days: 1 - demo-scripts: - name: demo-scripts + demo-runner: + strategy: + fail-fast: false + matrix: + proxy: [true, false] + wba: [true, false] + cache: [true, false] + robotstxt: [true, false] + + name: demo-runner needs: zig-build-release runs-on: ubuntu-latest @@ -85,36 +93,54 @@ jobs: - run: chmod a+x ./lightpanda - - name: run end to end tests - run: | - ./lightpanda serve & echo $! > LPD.pid - go run runner/main.go - kill `cat LPD.pid` - - - name: build proxy + - if: matrix.proxy == true + name: build and start proxy run: | cd proxy go build + ./proxy & echo $! > PROXY.id - - name: run end to end tests through proxy + - if: matrix.cache == true + run: mkdir /tmp/lp-cache + + - if: matrix.wba == true + run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem + + - id: args + name: build LP args run: | - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve --http-proxy 'http://127.0.0.1:3000' & echo $! > LPD.pid + args="" + [ "${{ matrix.proxy }}" = "true" ] && args="$args --http-proxy http://127.0.0.1:3000" + [ "${{ matrix.cache }}" = "true" ] && args="$args --http-cache-dir /tmp/lp-cache" + [ "${{ matrix.robotstxt }}" = "true" ] && args="$args --obey-robots" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-key-file private_key.pem" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-domain ${{ vars.WBA_DOMAIN }}" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }}" + echo $args + echo "value=$args" >> "$GITHUB_OUTPUT" + + - run: | + ./lightpanda serve ${{ steps.args.outputs.value }} & echo $! > LPD.pid + + - run: | go run runner/main.go - URL=https://demo-browser.lightpanda.io/campfire-commerce/ node puppeteer/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` - - name: run request interception through proxy and playwright + - run: | + kill `cat LPD.pid` + + - if: matrix.proxy == true run: | - export PROXY_USERNAME=username PROXY_PASSWORD=password - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve & echo $! > LPD.pid - BASE_URL=https://demo-browser.lightpanda.io/ node playwright/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + pkill proxy - # e2e tests w/ web-bot-auth configuration on. - wba-demo-scripts: - name: wba-demo-scripts + proxy-auth: + strategy: + fail-fast: false + matrix: + wba: [true, false] + cache: [true, false] + robotstxt: [true, false] + + name: demo-runner needs: zig-build-release runs-on: ubuntu-latest @@ -135,47 +161,46 @@ jobs: - run: chmod a+x ./lightpanda - - run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem - - - name: run end to end tests - run: | - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - & echo $! > LPD.pid - go run runner/main.go - kill `cat LPD.pid` - - - name: build proxy + - name: build and start proxy run: | cd proxy go build + ./proxy & echo $! > PROXY.id + + - if: matrix.cache == true + run: mkdir /tmp/lp-cache + + - if: matrix.wba == true + run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem + + - id: args + name: build LP args + run: | + args="" + [ "${{ matrix.cache }}" = "true" ] && args="$args --http-cache-dir /tmp/lp-cache" + [ "${{ matrix.robotstxt }}" = "true" ] && args="$args --obey-robots" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-key-file private_key.pem" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-domain ${{ vars.WBA_DOMAIN }}" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }}" + echo $args + echo "value=$args" >> "$GITHUB_OUTPUT" - name: run end to end tests through proxy run: | - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - --http-proxy 'http://127.0.0.1:3000' \ - & echo $! > LPD.pid + export PROXY_USERNAME=username PROXY_PASSWORD=password + ./lightpanda serve --http-proxy http://127.0.0.1:3000 ${{ steps.args.outputs.value }} & echo $! > LPD.pid go run runner/main.go URL=https://demo-browser.lightpanda.io/campfire-commerce/ node puppeteer/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + kill `cat LPD.pid` - name: run request interception through proxy and playwright run: | export PROXY_USERNAME=username PROXY_PASSWORD=password - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - & echo $! > LPD.pid + ./lightpanda serve ${{ steps.args.outputs.value }} & echo $! > LPD.pid BASE_URL=https://demo-browser.lightpanda.io/ node playwright/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + kill `cat LPD.pid` + + - run: pkill proxy wba-test: name: wba-test From c5b16cb18e8911601a4bdd0876d9d5d8c347602f Mon Sep 17 00:00:00 2001 From: Halil Durak Date: Tue, 28 Apr 2026 12:37:16 +0300 Subject: [PATCH 05/36] `Config`: add a custom validator for `--log-level` --- src/Config.zig | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Config.zig b/src/Config.zig index 85db1920..8ce27f1d 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -59,6 +59,18 @@ fn logFilterScopesValidator(allocator: Allocator, args: *std.process.ArgIterator } } +fn logLevelValidator(_: Allocator, args: *std.process.ArgIterator) !?log.Level { + const str = args.next() orelse return error.MissingArgument; + if (std.mem.eql(u8, str, "error")) { + return .err; + } + + return std.meta.stringToEnum(log.Level, str) orelse { + log.fatal(.app, "invalid option choice", .{ .arg = "--log-level", .value = str }); + return error.InvalidArgument; + }; +} + /// Common CLI args. const CommonOptions = .{ .{ .name = "obey_robots", .type = bool }, @@ -71,7 +83,7 @@ const CommonOptions = .{ .{ .name = "http_max_response_size", .type = ?usize }, .{ .name = "ws_max_concurrent", .type = ?u8 }, .{ .name = "insecure_disable_tls_host_verification", .type = bool }, - .{ .name = "log_level", .type = ?log.Level }, + .{ .name = "log_level", .type = ?log.Level, .validator = logLevelValidator }, .{ .name = "log_format", .type = ?log.Format }, .{ .name = "log_filter_scopes", .type = log.Scope, .multiple = true, .validator = logFilterScopesValidator }, .{ .name = "user_agent_suffix", .type = ?[]const u8 }, From 5dd15aa2cf13148dffc33ba455367266bfd0233b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:17:02 -0700 Subject: [PATCH 06/36] use layers for Cache, Robots and WebBotAuth --- src/browser/HttpClient.zig | 436 ++++++-------------------- src/network/layer/CacheLayer.zig | 239 ++++++++++++++ src/network/layer/Forward.zig | 134 ++++++++ src/network/layer/RobotsLayer.zig | 264 ++++++++++++++++ src/network/layer/WebBotAuthLayer.zig | 52 +++ 5 files changed, 777 insertions(+), 348 deletions(-) create mode 100644 src/network/layer/CacheLayer.zig create mode 100644 src/network/layer/Forward.zig create mode 100644 src/network/layer/RobotsLayer.zig create mode 100644 src/network/layer/WebBotAuthLayer.zig diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0832db1a..f22ab4d1 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -27,7 +27,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); -const Cache = @import("../network/cache/Cache.zig"); const timestamp = @import("../datetime.zig").timestamp; const log = lp.log; @@ -40,8 +39,11 @@ pub const Method = http.Method; pub const Headers = http.Headers; pub const ResponseHead = http.ResponseHead; pub const HeaderIterator = http.HeaderIterator; -const CacheMetadata = Cache.CachedMetadata; -const CachedResponse = Cache.CachedResponse; +const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; + +pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); +pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); +pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -101,10 +103,6 @@ allocator: Allocator, network: *Network, -// Queue of requests that depend on a robots.txt. -// Allows us to fetch the robots.txt just once. -pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, - // Once we have a handle/easy to process a request with, we create a Transfer // which contains the Request as well as any state we need to process the // request. These will come and go with each request. @@ -134,6 +132,37 @@ cdp_client: ?CDPClient = null, max_response_size: usize, +cache_layer: CacheLayer, +robots_layer: RobotsLayer, +web_bot_auth_layer: WebBotAuthLayer, +entry_layer: Layer, + +pub const Context = struct { + network: *Network, + + pub fn newHeaders(self: Context) !http.Headers { + return http.Headers.init(self.network.config.http_headers.user_agent_header); + } +}; + +pub const Layer = struct { + ptr: *anyopaque, + vtable: *const VTable, + + pub const VTable = struct { + request: *const fn (*anyopaque, Context, Request) anyerror!void, + }; + + pub fn request(self: Layer, ctx: Context, req: Request) !void { + return self.vtable.request(self.ptr, ctx, req); + } +}; + +fn layerWith(self: anytype, next: Layer) Layer { + self.next = next; + return self.layer(); +} + // libcurl can monitor arbitrary sockets, this lets us use libcurl to poll // both HTTP data as well as messages from an CDP connection. // Furthermore, we have some tension between blocking scripts and request @@ -175,8 +204,29 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .tls_verify = network.config.tlsVerifyHost(), .obey_robots = network.config.obeyRobots(), .max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32), + + .cache_layer = .{}, + .robots_layer = .{ .allocator = allocator }, + .web_bot_auth_layer = .{}, + .entry_layer = undefined, }; + var next = client.layer(); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + + if (network.config.obeyRobots()) { + next = layerWith(&client.robots_layer, next); + } + + if (network.config.httpCacheDir() != null) { + next = layerWith(&client.cache_layer, next); + } + + client.entry_layer = next; + return client; } @@ -185,17 +235,20 @@ pub fn deinit(self: *Client) void { self.handles.deinit(); self.transfer_pool.deinit(); - - var robots_iter = self.pending_robots_queue.iterator(); - while (robots_iter.next()) |entry| { - entry.value_ptr.deinit(self.allocator); - } - self.pending_robots_queue.deinit(self.allocator); - self.clearUserAgentOverride(); + + self.robots_layer.deinit(self.allocator); + self.allocator.destroy(self); } +pub fn layer(self: *Client) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = _request }, + }; +} + // Set a user agent override. Both the raw UA string and the pre-formatted // "User-Agent: " header string are allocated from self.allocator. pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void { @@ -350,102 +403,12 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { } pub fn request(self: *Client, req: Request) !void { - if (self.obey_robots == false) { - return self.processRequest(req); - } - - const robots_url = try URL.getRobotsUrl(self.allocator, req.url); - errdefer self.allocator.free(robots_url); - - // If we have this robots cached, we can take a fast path. - if (self.network.robot_store.get(robots_url)) |robot_entry| { - defer self.allocator.free(robots_url); - - switch (robot_entry) { - // If we have a found robots entry, we check it. - .present => |robots| { - const path = URL.getPathname(req.url); - if (!robots.isAllowed(path)) { - req.error_callback(req.ctx, error.RobotsBlocked); - return; - } - }, - // Otherwise, we assume we won't find it again. - .absent => {}, - } - - return self.processRequest(req); - } - return self.fetchRobotsThenProcessRequest(robots_url, req); + const ctx = Context{ .network = self.network }; + return self.entry_layer.request(ctx, req); } -fn serveFromCache(req: Request, cached: *const CachedResponse) !void { - const response = Response.fromCached(req.ctx, cached); - defer switch (cached.data) { - .buffer => |_| {}, - .file => |f| f.file.close(), - }; - - if (req.start_callback) |cb| { - try cb(response); - } - - const proceed = try req.header_callback(response); - if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; - } - - switch (cached.data) { - .buffer => |data| { - if (data.len > 0) { - try req.data_callback(response, data); - } - }, - .file => |f| { - const file = f.file; - - var buf: [1024]u8 = undefined; - var file_reader = file.reader(&buf); - try file_reader.seekTo(f.offset); - const reader = &file_reader.interface; - - var read_buf: [1024]u8 = undefined; - var remaining = f.len; - - while (remaining > 0) { - const read_len = @min(read_buf.len, remaining); - const n = try reader.readSliceShort(read_buf[0..read_len]); - if (n == 0) break; - remaining -= n; - try req.data_callback(response, read_buf[0..n]); - } - }, - } - - try req.done_callback(req.ctx); -} - -fn processRequest(self: *Client, req: Request) !void { - if (self.network.cache) |*cache| { - if (req.method == .GET) { - // cache is only used to read the meta data - const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); - defer self.network.app.arena_pool.release(arena); - - var iter = req.headers.iterator(); - const req_header_list = try iter.collect(arena); - - if (cache.get(arena, .{ - .url = req.url, - .timestamp = std.time.timestamp(), - .request_headers = req_header_list.items, - })) |cached| { - defer req.headers.deinit(); - return serveFromCache(req, &cached); - } - } - } +pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { + const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); @@ -479,176 +442,6 @@ fn processRequest(self: *Client, req: Request) !void { } } -const RobotsRequestContext = struct { - client: *Client, - req: Request, - robots_url: [:0]const u8, - buffer: std.ArrayList(u8), - status: u16 = 0, - - pub fn deinit(self: *RobotsRequestContext) void { - self.client.allocator.free(self.robots_url); - self.buffer.deinit(self.client.allocator); - self.client.allocator.destroy(self); - } -}; - -fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void { - const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url); - - if (!entry.found_existing) { - errdefer self.allocator.free(robots_url); - - // If we aren't already fetching this robots, - // we want to create a new queue for it and add this request into it. - entry.value_ptr.* = .empty; - - const ctx = try self.allocator.create(RobotsRequestContext); - errdefer self.allocator.destroy(ctx); - ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty }; - const headers = try self.newHeaders(); - - log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.processRequest(.{ - .ctx = ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, - .header_callback = robotsHeaderCallback, - .data_callback = robotsDataCallback, - .done_callback = robotsDoneCallback, - .error_callback = robotsErrorCallback, - .shutdown_callback = robotsShutdownCallback, - }); - } else { - // Not using our own robots URL, only using the one from the first request. - self.allocator.free(robots_url); - } - - try entry.value_ptr.append(self.allocator, req); -} - -fn robotsHeaderCallback(response: Response) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - // Robots callbacks only happen on real live requests. - const transfer = response.inner.transfer; - - if (transfer.response_header) |hdr| { - log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); - ctx.status = hdr.status; - } - - if (transfer.getContentLength()) |cl| { - try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl); - } - - return true; -} - -fn robotsDataCallback(response: Response, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - try ctx.buffer.appendSlice(ctx.client.allocator, data); -} - -fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - var allowed = true; - - switch (ctx.status) { - 200 => { - if (ctx.buffer.items.len > 0) { - const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes( - ctx.client.getUserAgent(), - ctx.buffer.items, - ) catch blk: { - log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); - // If we fail to parse, we just insert it as absent and ignore. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - break :blk null; - }; - - if (robots) |r| { - try ctx.client.network.robot_store.put(ctx.robots_url, r); - const path = URL.getPathname(ctx.req.url); - allowed = r.isAllowed(path); - } - } - }, - 404 => { - log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); - // If we get a 404, we just insert it as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - else => { - log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status }); - // If we get an unexpected status, we just insert as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - } - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsDoneCallbacke empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); - queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); - } else { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } - } -} - -fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.warn(.http, "robots fetch failed", .{ .err = err }); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - // On error, allow all queued requests to proceed - for (queued.value.items) |queued_req| { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } -} - -fn robotsShutdownCallback(ctx_ptr: *anyopaque) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.debug(.http, "robots fetch shutdown", .{}); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (queued_req.shutdown_callback) |shutdown_cb| { - shutdown_cb(queued_req.ctx); - } - } -} - fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { // The request was intercepted and is blocking. This is messy, but our // callers, the ScriptManager -> Page, don't have a great way to stop the @@ -1028,13 +821,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T } } - if (transfer._pending_cache_metadata) |metadata| { - const cache = &self.network.cache.?; - cache.put(metadata.*, body) catch |err| { - log.warn(.cache, "cache put failed", .{ .err = err }); - }; - } - // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); @@ -1155,6 +941,13 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + frame_id: u32, loader_id: u32, method: Method, @@ -1178,12 +971,12 @@ pub const Request = struct { // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, - start_callback: ?*const fn (response: Response) anyerror!void = null, - header_callback: *const fn (response: Response) anyerror!bool, - data_callback: *const fn (response: Response, data: []const u8) anyerror!void, - done_callback: *const fn (ctx: *anyopaque) anyerror!void, - error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, - shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, const ResourceType = enum { document, @@ -1204,6 +997,10 @@ pub const Request = struct { }; } }; + + pub fn deinit(self: *const Request) void { + self.headers.deinit(); + } }; pub const Response = struct { @@ -1290,7 +1087,6 @@ pub const Transfer = struct { // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, - _pending_cache_metadata: ?*CacheMetadata = null, start_time: u64, aborted: bool = false, @@ -1442,12 +1238,6 @@ pub const Transfer = struct { try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); - // If we have WebBotAuth, sign our request. - if (client.network.web_bot_auth) |*wba| { - const authority = URL.getHost(req.url); - try wba.signRequest(self.arena.allocator(), &header_list, authority); - } - // Add cookies from cookie jar. if (try self.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); @@ -1693,56 +1483,6 @@ pub const Transfer = struct { return err; }; - if (transfer.client.network.cache != null and transfer.req.method == .GET) { - const rh = &transfer.response_header.?; - const allocator = transfer.arena.allocator(); - - const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; - - const maybe_cm = try Cache.tryCache( - allocator, - std.time.timestamp(), - transfer.url, - rh.status, - rh.contentType(), - if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, - vary, - if (conn.getResponseHeader("age", 0)) |h| h.value else null, - conn.getResponseHeader("set-cookie", 0) != null, - conn.getResponseHeader("authorization", 0) != null, - ); - - if (maybe_cm) |cm| { - var iter = transfer.responseHeaderIterator(); - var header_list = try iter.collect(allocator); - const end_of_response = header_list.items.len; - - if (vary) |vary_str| { - var req_it = transfer.req.headers.iterator(); - - while (req_it.next()) |hdr| { - var vary_iter = std.mem.splitScalar(u8, vary_str, ','); - - while (vary_iter.next()) |part| { - const name = std.mem.trim(u8, part, &std.ascii.whitespace); - if (std.ascii.eqlIgnoreCase(hdr.name, name)) { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }); - } - } - } - } - - const metadata = try transfer.arena.allocator().create(CacheMetadata); - metadata.* = cm; - metadata.headers = header_list.items[0..end_of_response]; - metadata.vary_headers = header_list.items[end_of_response..]; - transfer._pending_cache_metadata = metadata; - } - } - return proceed and transfer.aborted == false; } diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig new file mode 100644 index 00000000..60dd1f2d --- /dev/null +++ b/src/network/layer/CacheLayer.zig @@ -0,0 +1,239 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const http = @import("../http.zig"); +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const Cache = @import("../cache/Cache.zig"); +const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata; +const CachedResponse = @import("../cache/Cache.zig").CachedResponse; +const Forward = @import("Forward.zig"); + +const CacheLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *CacheLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *CacheLayer = @ptrCast(@alignCast(ptr)); + const network = ctx.network; + + if (req.method != .GET) { + return self.next.request(ctx, req); + } + + const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); + errdefer network.app.arena_pool.release(arena); + + var iter = req.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (network.cache.?.get(arena, .{ + .url = req.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { + defer req.deinit(); + defer network.app.arena_pool.release(arena); + return serveFromCache(req, &cached); + } + + const cache_ctx = try arena.create(CacheContext); + cache_ctx.* = .{ + .arena = arena, + .context = ctx, + .forward = Forward.fromRequest(req), + .req_url = req.url, + .req_headers = req.headers, + }; + + const wrapped = cache_ctx.forward.wrapRequest( + req, + cache_ctx, + .{ + .start = CacheContext.startCallback, + .header = CacheContext.headerCallback, + .done = CacheContext.doneCallback, + .shutdown = CacheContext.shutdownCallback, + .err = CacheContext.errorCallback, + }, + ); + + return self.next.request(ctx, wrapped); +} + +fn serveFromCache(req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer switch (cached.data) { + .buffer => |_| {}, + .file => |f| f.file.close(), + }; + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + req.error_callback(req.ctx, error.Abort); + return; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |f| { + const file = f.file; + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + const reader = &file_reader.interface; + var read_buf: [1024]u8 = undefined; + var remaining = f.len; + while (remaining > 0) { + const read_len = @min(read_buf.len, remaining); + const n = try reader.readSliceShort(read_buf[0..read_len]); + if (n == 0) break; + remaining -= n; + try req.data_callback(response, read_buf[0..n]); + } + }, + } + + try req.done_callback(req.ctx); +} + +const CacheContext = struct { + arena: std.mem.Allocator, + context: Context, + transfer: ?*Transfer = null, + forward: Forward, + req_url: [:0]const u8, + req_headers: http.Headers, + pending_metadata: ?*CachedMetadata = null, + + fn startCallback(response: Response) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + self.transfer = response.inner.transfer; + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + const allocator = self.arena; + + const transfer = response.inner.transfer; + var rh = &transfer.response_header.?; + + const conn = transfer._conn.?; + + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + + const maybe_cm = try Cache.tryCache( + allocator, + std.time.timestamp(), + transfer.url, + rh.status, + rh.contentType(), + if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, + vary, + if (conn.getResponseHeader("age", 0)) |h| h.value else null, + conn.getResponseHeader("set-cookie", 0) != null, + conn.getResponseHeader("authorization", 0) != null, + ); + + if (maybe_cm) |cm| { + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + + if (vary) |vary_str| { + var req_it = self.req_headers.iterator(); + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + } + + const metadata = try allocator.create(CachedMetadata); + metadata.* = cm; + metadata.headers = header_list.items[0..end_of_response]; + metadata.vary_headers = header_list.items[end_of_response..]; + self.pending_metadata = metadata; + } + + return self.forward.forwardHeader(response); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + + const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); + + if (self.pending_metadata) |metadata| { + const cache = &self.context.network.cache.?; + + log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); + cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { + log.warn(.http, "cache put failed", .{ .err = err }); + }; + log.debug(.browser, "http.cache.put", .{ .url = self.req_url }); + } + + return self.forward.forwardDone(); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + self.forward.forwardShutdown(); + } + + fn errorCallback(ctx: *anyopaque, e: anyerror) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + self.forward.forwardErr(e); + } +}; diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig new file mode 100644 index 00000000..b11ff23f --- /dev/null +++ b/src/network/layer/Forward.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; + +const Forward = @This(); + +ctx: *anyopaque, +start: ?Request.StartCallback, +header: Request.HeaderCallback, +data: Request.DataCallback, +done: Request.DoneCallback, +err: Request.ErrorCallback, +shutdown: ?Request.ShutdownCallback, + +pub fn fromRequest(req: Request) Forward { + return .{ + .ctx = req.ctx, + .start = req.start_callback, + .header = req.header_callback, + .data = req.data_callback, + .done = req.done_callback, + .err = req.error_callback, + .shutdown = req.shutdown_callback, + }; +} + +pub const Overrides = struct { + start: ?Request.StartCallback = null, + header: ?Request.HeaderCallback = null, + data: ?Request.DataCallback = null, + done: ?Request.DoneCallback = null, + err: ?Request.ErrorCallback = null, + shutdown: ?Request.ShutdownCallback = null, +}; + +pub fn wrapRequest( + self: *Forward, + req: Request, + new_ctx: anytype, + overrides: Overrides, +) Request { + const T = @TypeOf(new_ctx.*); + const PassthroughT = makePassthrough(T, "forward"); + var wrapped = req; + wrapped.ctx = new_ctx; + wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; + wrapped.header_callback = overrides.header orelse PassthroughT.header; + wrapped.data_callback = overrides.data orelse PassthroughT.data; + wrapped.done_callback = overrides.done orelse PassthroughT.done; + wrapped.error_callback = overrides.err orelse PassthroughT.err; + wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null; + return wrapped; +} + +fn makePassthrough(comptime T: type, comptime field: []const u8) type { + return struct { + pub fn start(response: Response) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardStart(response); + } + + pub fn header(response: Response) anyerror!bool { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardHeader(response); + } + + pub fn data(response: Response, chunk: []const u8) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardData(response, chunk); + } + + pub fn done(ctx_ptr: *anyopaque) anyerror!void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + return @field(self, field).forwardDone(); + } + + pub fn err(ctx_ptr: *anyopaque, e: anyerror) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardErr(e); + } + + pub fn shutdown(ctx_ptr: *anyopaque) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardShutdown(); + } + }; +} + +pub fn forwardStart(self: Forward, response: Response) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + if (self.start) |cb| try cb(fwd); +} + +pub fn forwardHeader(self: Forward, response: Response) anyerror!bool { + var fwd = response; + fwd.ctx = self.ctx; + return self.header(fwd); +} + +pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + return self.data(fwd, chunk); +} + +pub fn forwardDone(self: Forward) anyerror!void { + return self.done(self.ctx); +} + +pub fn forwardErr(self: Forward, e: anyerror) void { + self.err(self.ctx, e); +} + +pub fn forwardShutdown(self: Forward) void { + if (self.shutdown) |cb| cb(self.ctx); +} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig new file mode 100644 index 00000000..0119d747 --- /dev/null +++ b/src/network/layer/RobotsLayer.zig @@ -0,0 +1,264 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const URL = @import("../../browser/URL.zig"); +const Robots = @import("../Robots.zig"); +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const RobotsLayer = @This(); + +next: Layer = undefined, +allocator: std.mem.Allocator, +pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty, + +pub fn layer(self: *RobotsLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { + var it = self.pending.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(allocator); + } + self.pending.deinit(allocator); +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); + + const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); + errdefer ctx.network.app.arena_pool.release(arena); + + const robots_url = try URL.getRobotsUrl(arena, req.url); + + if (ctx.network.robot_store.get(robots_url)) |robot_entry| { + defer ctx.network.app.arena_pool.release(arena); + + switch (robot_entry) { + .present => |robots| { + const path = URL.getPathname(req.url); + + if (!robots.isAllowed(path)) { + log.warn(.http, "blocked by robots", .{ .url = req.url }); + req.error_callback(req.ctx, error.RobotsBlocked); + return; + } + }, + .absent => {}, + } + return self.next.request(ctx, req); + } + + return self.fetchRobotsThenRequest(ctx, arena, robots_url, req); +} + +fn fetchRobotsThenRequest( + self: *RobotsLayer, + ctx: Context, + arena: std.mem.Allocator, + robots_url: [:0]const u8, + req: Request, +) !void { + errdefer ctx.network.app.arena_pool.release(arena); + + const entry = try self.pending.getOrPut(self.allocator, robots_url); + + if (!entry.found_existing) { + errdefer std.debug.assert(self.pending.remove(robots_url)); + entry.value_ptr.* = .empty; + + const robots_ctx = try arena.create(RobotsContext); + robots_ctx.* = .{ + .layer = self, + .ctx = ctx, + .arena = arena, + .robots_url = robots_url, + .buffer = .empty, + }; + + const headers = try ctx.newHeaders(); + log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + + try self.next.request(ctx, .{ + .ctx = robots_ctx, + .url = robots_url, + .method = .GET, + .headers = headers, + .blocking = false, + .frame_id = req.frame_id, + .loader_id = req.loader_id, + .cookie_jar = req.cookie_jar, + .cookie_origin = req.cookie_origin, + .notification = req.notification, + .resource_type = .fetch, + .header_callback = RobotsContext.headerCallback, + .data_callback = RobotsContext.dataCallback, + .done_callback = RobotsContext.doneCallback, + .error_callback = RobotsContext.errorCallback, + .shutdown_callback = RobotsContext.shutdownCallback, + }); + } else { + ctx.network.app.arena_pool.release(arena); + } + + try entry.value_ptr.append(self.allocator, req); +} + +fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPending: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + if (!allowed) { + log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); + defer queued_req.deinit(); + queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); + } else { + self.next.request(ctx, queued_req) catch |e| { + defer queued_req.deinit(); + queued_req.error_callback(queued_req.ctx, e); + }; + } + } +} + +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPendingShutdown: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + defer queued_req.deinit(); + if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); + } +} + +const RobotsContext = struct { + layer: *RobotsLayer, + arena: std.mem.Allocator, + ctx: Context, + robots_url: [:0]const u8, + buffer: std.ArrayListUnmanaged(u8), + status: u16 = 0, + + fn deinit(self: *RobotsContext) void { + self.buffer.deinit(self.layer.allocator); + self.layer.allocator.destroy(self); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + switch (response.inner) { + .transfer => |t| { + if (t.response_header) |hdr| { + log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url }); + self.status = hdr.status; + } + if (t.getContentLength()) |cl| { + try self.buffer.ensureTotalCapacity(self.arena, cl); + } + }, + .cached => {}, + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + try self.buffer.appendSlice(self.arena, data); + } + + fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + var allowed = true; + const network = ctx.network; + + switch (self.status) { + 200 => { + if (self.buffer.items.len > 0) { + const robots: ?Robots = network.robot_store.robotsFromBytes( + network.config.http_headers.user_agent, + self.buffer.items, + ) catch blk: { + log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url }); + try network.robot_store.putAbsent(robots_url); + break :blk null; + }; + if (robots) |r| { + try network.robot_store.put(robots_url, r); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].url); + allowed = r.isAllowed(path); + } + } + }, + 404 => { + log.debug(.http, "robots not found", .{ .url = robots_url }); + try network.robot_store.putAbsent(robots_url); + }, + else => { + log.debug(.http, "unexpected status on robots", .{ + .url = robots_url, + .status = self.status, + }); + try network.robot_store.putAbsent(robots_url); + }, + } + + l.flushPending(ctx, robots_url, allowed); + } + + fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + log.warn(.http, "robots fetch failed", .{ .err = err }); + l.flushPending(ctx, robots_url, true); + } + + fn shutdownCallback(ctx_ptr: *anyopaque) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + log.debug(.http, "robots fetch shutdown", .{}); + l.flushPendingShutdown(robots_url); + } +}; diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig new file mode 100644 index 00000000..42bf99a2 --- /dev/null +++ b/src/network/layer/WebBotAuthLayer.zig @@ -0,0 +1,52 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const URL = @import("../../browser/URL.zig"); +const WebBotAuth = @import("../WebBotAuth.zig"); +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const WebBotAuthLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *WebBotAuthLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); + var our_req = req; + + const wba = ctx.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + + const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); + defer ctx.network.app.arena_pool.release(arena); + + const authority = URL.getHost(req.url); + try wba.signRequest(arena, &our_req.headers, authority); + + return self.next.request(ctx, our_req); +} From 46d0b34c54a748b5f82e34b70bb871852325b68b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:36:24 -0700 Subject: [PATCH 07/36] add RequestParams and SyncRequest --- src/browser/Frame.zig | 22 ++- src/browser/HttpClient.zig | 214 +++++++++++++++------- src/browser/ScriptManager.zig | 62 ++++--- src/browser/webapi/Worker.zig | 20 +- src/browser/webapi/net/Fetch.zig | 22 ++- src/browser/webapi/net/XMLHttpRequest.zig | 24 +-- src/cdp/domains/fetch.zig | 12 +- src/cdp/domains/network.zig | 18 +- src/cdp/id.zig | 4 +- src/network/layer/CacheLayer.zig | 10 +- src/network/layer/RobotsLayer.zig | 32 ++-- src/network/layer/WebBotAuthLayer.zig | 4 +- 12 files changed, 273 insertions(+), 171 deletions(-) diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index e407ab12..3e8ff90f 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -647,16 +647,18 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo http_client.request(.{ .ctx = self, - .url = self.url, - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .method = opts.method, - .headers = headers, - .body = opts.body, - .cookie_jar = &session.cookie_jar, - .cookie_origin = self.url, - .resource_type = .document, - .notification = self._session.notification, + .params = .{ + .url = self.url, + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .method = opts.method, + .headers = headers, + .body = opts.body, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.url, + .resource_type = .document, + .notification = self._session.notification, + }, .header_callback = frameHeaderDoneCallback, .data_callback = frameDataCallback, .done_callback = frameDoneCallback, diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index f22ab4d1..0dbee3bd 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -338,7 +338,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { const transfer: *Transfer = @fieldParentPtr("_node", node); if (comptime abort_all) { transfer.kill(); - } else if (transfer.req.frame_id == frame_id) { + } else if (transfer.req.params.frame_id == frame_id) { q.remove(node); transfer.kill(); } @@ -375,7 +375,7 @@ fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_ const conn: *http.Connection = @fieldParentPtr("node", node); switch (conn.transport) { .http => |transfer| { - if ((comptime abort_all) or transfer.req.frame_id == frame_id) { + if ((comptime abort_all) or transfer.req.params.frame_id == frame_id) { transfer.kill(); } }, @@ -402,20 +402,15 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn request(self: *Client, req: Request) !void { - const ctx = Context{ .network = self.network }; - return self.entry_layer.request(ctx, req); -} - pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); + transfer.req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); var wait_for_interception = false; - transfer.req.notification.dispatch(.http_request_intercept, &.{ + transfer.req.params.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception, }); @@ -430,7 +425,7 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } transfer._intercept_state = .pending; - if (req.blocking == false) { + if (req.params.blocking == false) { // The request was interecepted, but it isn't a blocking request, so we // dont' need to block this call. The request will be unblocked // asynchronously via either continueTransfer or abortTransfer @@ -442,6 +437,81 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } } +pub fn request(self: *Client, req: Request) !void { + const ctx = Context{ .network = self.network }; + return self.entry_layer.request(ctx, req); +} + +const SyncContext = struct { + allocator: Allocator, + completion: union(enum) { + in_progress: void, + done: void, + err: anyerror, + shutdown: void, + } = .in_progress, + + status: u16 = 0, + body: std.ArrayList(u8), + + fn headerCallback(response: Response) anyerror!bool { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + self.status = response.status().?; + if (response.contentLength()) |cl| { + try self.body.ensureTotalCapacity(self.allocator, cl); + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + try self.body.appendSlice(self.allocator, data); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .done; + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .{ .err = err }; + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .shutdown; + } +}; + +pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) !SyncResponse { + var sync_ctx = SyncContext{ .allocator = allocator, .body = .empty }; + errdefer sync_ctx.body.deinit(allocator); + + try self.request(.{ + .params = params, + .ctx = &sync_ctx, + .header_callback = SyncContext.headerCallback, + .data_callback = SyncContext.dataCallback, + .done_callback = SyncContext.doneCallback, + .error_callback = SyncContext.errorCallback, + .shutdown_callback = SyncContext.shutdownCallback, + }); + + while (sync_ctx.completion == .in_progress) { + _ = try self.tick(200); + } + + switch (sync_ctx.completion) { + .in_progress => @panic("Impossible to be in progress here."), + .done, .shutdown => return .{ + .status = sync_ctx.status, + .body = sync_ctx.body, + }, + .err => |e| return e, + } +} + fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { // The request was intercepted and is blocking. This is messy, but our // callers, the ScriptManager -> Page, don't have a great way to stop the @@ -507,7 +577,7 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { } self.intercepted -= 1; - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { return self.process(transfer); } transfer._intercept_state = .@"continue"; @@ -521,7 +591,7 @@ pub fn abortTransfer(self: *Client, transfer: *Transfer) void { } self.intercepted -= 1; - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { transfer.abort(error.Abort); } transfer._intercept_state = .{ .abort = error.Abort }; @@ -536,7 +606,7 @@ pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: self.intercepted -= 1; try transfer.fulfill(status, headers, body); - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { transfer.deinit(); return; } @@ -554,7 +624,7 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.headers.deinit(); + errdefer req.params.headers.deinit(); const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); @@ -564,7 +634,7 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .start_time = timestamp(.monotonic), .arena = ArenaAllocator.init(self.allocator), .id = id, - .url = req.url, + .url = req.params.url, .req = req, .client = self, }; @@ -581,7 +651,7 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.notification.dispatch(.http_request_fail, &.{ + transfer.req.params.notification.dispatch(.http_request_fail, &.{ .transfer = transfer, .err = err, }); @@ -704,7 +774,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // TODO give a way to configure the number of auth retries. if (transfer._auth_challenge != null and transfer._tries < 10) { var wait_for_interception = false; - transfer.req.notification.dispatch( + transfer.req.params.notification.dispatch( .http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, ); @@ -720,7 +790,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // release the easy handle back into the pool. The transfer // is still valid/alive (just has no handle). transfer.releaseConn(); - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { // In the case of an async request, we can just "forget" // about this transfer until it gets updated asynchronously // from some CDP command. @@ -810,7 +880,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.notification.dispatch(.http_response_data, &.{ + transfer.req.params.notification.dispatch(.http_response_data, &.{ .data = body, .transfer = transfer, }); @@ -827,7 +897,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T try transfer.req.done_callback(transfer.req.ctx); - transfer.req.notification.dispatch(.http_request_done, &.{ + transfer.req.params.notification.dispatch(.http_request_done, &.{ .transfer = transfer, }); @@ -940,14 +1010,7 @@ fn ensureNoActiveConnection(self: *const Client) !void { } } -pub const Request = struct { - pub const StartCallback = *const fn (response: Response) anyerror!void; - pub const HeaderCallback = *const fn (response: Response) anyerror!bool; - pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; - pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; - pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; - pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; - +pub const RequestParams = struct { frame_id: u32, loader_id: u32, method: Method, @@ -968,16 +1031,6 @@ pub const Request = struct { // reason for that is the Http Client is already a bit CDP-aware. blocking: bool = false, - // arbitrary data that can be associated with this request - ctx: *anyopaque = undefined, - - start_callback: ?StartCallback = null, - header_callback: HeaderCallback, - data_callback: DataCallback, - done_callback: DoneCallback, - error_callback: ErrorCallback, - shutdown_callback: ?ShutdownCallback = null, - const ResourceType = enum { document, xhr, @@ -998,11 +1051,35 @@ pub const Request = struct { } }; - pub fn deinit(self: *const Request) void { + pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); } }; +pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + + params: RequestParams, + // arbitrary data that can be associated with this request + ctx: *anyopaque = undefined, + + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, + + pub fn deinit(self: *const Request) void { + self.params.deinit(); + } +}; + pub const Response = struct { ctx: *anyopaque, inner: union(enum) { @@ -1078,6 +1155,15 @@ pub const Response = struct { } }; +pub const SyncResponse = struct { + status: u16, + body: std.ArrayList(u8), + + pub fn deinit(self: *SyncResponse, allocator: Allocator) void { + self.body.deinit(allocator); + } +}; + pub const Transfer = struct { arena: ArenaAllocator, id: u32 = 0, @@ -1146,7 +1232,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.headers.deinit(); + self.req.deinit(); self.arena.deinit(); self.client.transfer_pool.destroy(self); } @@ -1204,7 +1290,7 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.notification.dispatch(.http_request_fail, &.{ + self.req.params.notification.dispatch(.http_request_fail, &.{ .transfer = self, .err = err, }); @@ -1226,15 +1312,15 @@ pub const Transfer = struct { try conn.setProxy(client.http_proxy); try conn.setTlsVerify(client.tls_verify, client.use_proxy); - try conn.setURL(req.url); - try conn.setMethod(req.method); - if (req.body) |b| { + try conn.setURL(req.params.url); + try conn.setMethod(req.params.method); + if (req.params.body) |b| { try conn.setBody(b); } else { try conn.setGetMode(); } - var header_list = req.headers; + var header_list = req.params.headers; try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); @@ -1246,12 +1332,12 @@ pub const Transfer = struct { conn.transport = .{ .http = self }; // Per-request timeout override (e.g. XHR timeout) - if (req.timeout_ms > 0) { - try conn.setTimeout(req.timeout_ms); + if (req.params.timeout_ms > 0) { + try conn.setTimeout(req.params.timeout_ms); } // add credentials - if (req.credentials) |creds| { + if (req.params.credentials) |creds| { if (self._auth_challenge != null and self._auth_challenge.?.source == .proxy) { try conn.setProxyCredentials(creds); } else { @@ -1301,12 +1387,12 @@ pub const Transfer = struct { } pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.cookie_jar orelse return null; + const jar = self.req.params.cookie_jar orelse return null; var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.url, &aw.writer, .{ + try jar.forRequest(self.req.params.url, &aw.writer, .{ .is_http = true, - .origin_url = self.req.cookie_origin, - .is_navigation = self.req.resource_type == .document, + .origin_url = self.req.params.cookie_origin, + .is_navigation = self.req.params.resource_type == .document, }); const written = aw.written(); if (written.len == 0) return null; @@ -1316,7 +1402,7 @@ pub const Transfer = struct { pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; - return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); + return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); } pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { @@ -1324,7 +1410,7 @@ pub const Transfer = struct { self.url = url; // for the request itself - self.req.url = url; + self.req.params.url = url; } fn handleRedirect(transfer: *Transfer) !void { @@ -1338,7 +1424,7 @@ pub const Transfer = struct { } // retrieve cookies from the redirect's response. - if (req.cookie_jar) |jar| { + if (req.params.cookie_jar) |jar| { var i: usize = 0; while (conn.getResponseHeader("set-cookie", i)) |ct| : (i += 1) { try jar.populateFromResponse(transfer.url, ct.value); @@ -1382,8 +1468,8 @@ pub const Transfer = struct { // 307, 308 → keep method and body. const status = try conn.getResponseCode(); if (status == 301 or status == 302 or status == 303) { - req.method = .GET; - req.body = null; + req.params.method = .GET; + req.params.body = null; } } @@ -1410,11 +1496,11 @@ pub const Transfer = struct { } pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { - self.req.credentials = userpwd; + self.req.params.credentials = userpwd; } pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const http.Header) !void { - self.req.headers.deinit(); + self.req.params.headers.deinit(); var buf: std.ArrayList(u8) = .empty; var new_headers = try self.client.newHeaders(); @@ -1426,7 +1512,7 @@ pub const Transfer = struct { try buf.append(allocator, 0); // null terminated try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); } - self.req.headers = new_headers; + self.req.params.headers = new_headers; } // abortAuthChallenge is called when an auth challenge interception is @@ -1438,7 +1524,7 @@ pub const Transfer = struct { log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); } self.client.intercepted -= 1; - if (!self.req.blocking) { + if (!self.req.params.blocking) { self.abort(error.AbortAuthChallenge); return; } @@ -1454,7 +1540,7 @@ pub const Transfer = struct { try transfer.buildResponseHeader(conn); - if (transfer.req.cookie_jar) |jar| { + if (transfer.req.params.cookie_jar) |jar| { var i: usize = 0; while (true) { const ct = conn.getResponseHeader("set-cookie", i); @@ -1474,7 +1560,7 @@ pub const Transfer = struct { } } - transfer.req.notification.dispatch(.http_response_header_done, &.{ + transfer.req.params.notification.dispatch(.http_response_header_done, &.{ .transfer = transfer, }); @@ -1574,7 +1660,7 @@ pub const Transfer = struct { transfer.response_header = .{ .status = status, - .url = req.url, + .url = req.params.url, .redirect_count = 0, ._injected_headers = headers, }; diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 5f82b88e..3215eb7b 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -291,17 +291,19 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e defer self.is_evaluating = was_evaluating; try self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .blocking = is_blocking, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -407,16 +409,18 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -513,16 +517,18 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), .ctx = script, - .resource_type = .script, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .resource_type = .script, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 558d4cce..81d74116 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -95,15 +95,17 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { const http_client = session.browser.http_client; http_client.request(.{ .ctx = self, - .url = resolved_url, - .method = .GET, - .headers = try http_client.newHeaders(), - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .resource_type = .script, - .cookie_jar = &session.cookie_jar, - .cookie_origin = resolved_url, - .notification = session.notification, + .params = .{ + .url = resolved_url, + .method = .GET, + .headers = try http_client.newHeaders(), + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .resource_type = .script, + .cookie_jar = &session.cookie_jar, + .cookie_origin = resolved_url, + .notification = session.notification, + }, .header_callback = httpHeaderCallback, .data_callback = httpDataCallback, .done_callback = httpDoneCallback, diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 623037d6..777f8311 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -94,16 +94,18 @@ pub fn init(input: Input, options: ?InitOpts, frame: *Frame) !js.Promise { try http_client.request(.{ .ctx = fetch, - .url = request._url, - .method = request._method, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = request._body, - .headers = headers, - .resource_type = .fetch, - .cookie_jar = cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = request._url, + .method = request._method, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = request._body, + .headers = headers, + .resource_type = .fetch, + .cookie_jar = cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 35f7f7fa..19a7676b 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -257,17 +257,19 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { http_client.request(.{ .ctx = self, - .url = self._url, - .method = self._method, - .headers = headers, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = self._request_body, - .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, - .cookie_origin = frame.url, - .resource_type = .xhr, - .timeout_ms = self._timeout, - .notification = frame._session.notification, + .params = .{ + .url = self._url, + .method = self._method, + .headers = headers, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = self._request_body, + .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, + .cookie_origin = frame.url, + .resource_type = .xhr, + .timeout_ms = self._timeout, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index d0a1d1db..57d0bd56 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -198,9 +198,9 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. try bc.cdp.sendEvent("Fetch.requestPaused", .{ .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), + .frameId = &id.toFrameId(transfer.req.params.frame_id), .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .resourceType = switch (transfer.req.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", @@ -251,7 +251,7 @@ fn continueRequest(cmd: *CDP.Command) !void { try transfer.updateURL(try arena.dupeZ(u8, url)); } if (params.method) |method| { - transfer.req.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + transfer.req.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { @@ -265,7 +265,7 @@ fn continueRequest(cmd: *CDP.Command) !void { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.body = body; + transfer.req.params.body = body; } try bc.cdp.browser.http_client.continueTransfer(transfer); @@ -402,9 +402,9 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati try bc.cdp.sendEvent("Fetch.authRequired", .{ .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), + .frameId = &id.toFrameId(transfer.req.params.frame_id), .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .resourceType = switch (transfer.req.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index ff5778e9..74c80197 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -275,20 +275,20 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques const transfer = msg.transfer; const req = &transfer.req; - const frame_id = req.frame_id; + const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; // Modify request with extra CDP headers for (bc.extra_headers.items) |extra| { - try req.headers.add(extra); + try req.params.headers.add(extra); } // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .type = req.resource_type.string(), + .loaderId = &id.toLoaderId(req.params.loader_id), + .type = req.params.resource_type.string(), .documentURL = frame.url, .request = TransferAsRequestWriter.init(transfer), .initiator = .{ .type = "other" }, @@ -309,9 +309,9 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ - .frameId = &id.toFrameId(req.frame_id), + .frameId = &id.toFrameId(req.params.frame_id), .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), + .loaderId = &id.toLoaderId(req.params.loader_id), .response = TransferAsResponseWriter.init(arena, transfer), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); @@ -359,18 +359,18 @@ pub const TransferAsRequestWriter = struct { { try jws.objectField("method"); - try jws.write(@tagName(transfer.req.method)); + try jws.write(@tagName(transfer.req.params.method)); } { try jws.objectField("hasPostData"); - try jws.write(transfer.req.body != null); + try jws.write(transfer.req.params.body != null); } { try jws.objectField("headers"); try jws.beginObject(); - var it = transfer.req.headers.iterator(); + var it = transfer.req.params.headers.iterator(); while (it.next()) |hdr| { try jws.objectField(hdr.name); try jws.write(hdr.value); diff --git a/src/cdp/id.zig b/src/cdp/id.zig index a5d1286f..5dc3a6c2 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -43,8 +43,8 @@ pub fn toLoaderId(id: u32) [14]u8 { const Transfer = @import("../browser/HttpClient.zig").Transfer; pub fn toRequestId(transfer: *const Transfer) [14]u8 { const req = transfer.req; - if (req.resource_type == .document) { - return toLoaderId(req.loader_id); + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); } var buf: [14]u8 = undefined; diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 60dd1f2d..7bc36e39 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -48,18 +48,18 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const self: *CacheLayer = @ptrCast(@alignCast(ptr)); const network = ctx.network; - if (req.method != .GET) { + if (req.params.method != .GET) { return self.next.request(ctx, req); } const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); errdefer network.app.arena_pool.release(arena); - var iter = req.headers.iterator(); + var iter = req.params.headers.iterator(); const req_header_list = try iter.collect(arena); if (network.cache.?.get(arena, .{ - .url = req.url, + .url = req.params.url, .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { @@ -73,8 +73,8 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { .arena = arena, .context = ctx, .forward = Forward.fromRequest(req), - .req_url = req.url, - .req_headers = req.headers, + .req_url = req.params.url, + .req_headers = req.params.headers, }; const wrapped = cache_ctx.forward.wrapRequest( diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 0119d747..714d205b 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -56,17 +56,17 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); errdefer ctx.network.app.arena_pool.release(arena); - const robots_url = try URL.getRobotsUrl(arena, req.url); + const robots_url = try URL.getRobotsUrl(arena, req.params.url); if (ctx.network.robot_store.get(robots_url)) |robot_entry| { defer ctx.network.app.arena_pool.release(arena); switch (robot_entry) { .present => |robots| { - const path = URL.getPathname(req.url); + const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - log.warn(.http, "blocked by robots", .{ .url = req.url }); + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); return; } @@ -108,16 +108,18 @@ fn fetchRobotsThenRequest( try self.next.request(ctx, .{ .ctx = robots_ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, + .params = .{ + .url = robots_url, + .method = .GET, + .headers = headers, + .blocking = false, + .frame_id = req.params.frame_id, + .loader_id = req.params.loader_id, + .cookie_jar = req.params.cookie_jar, + .cookie_origin = req.params.cookie_origin, + .notification = req.params.notification, + .resource_type = .fetch, + }, .header_callback = RobotsContext.headerCallback, .data_callback = RobotsContext.dataCallback, .done_callback = RobotsContext.doneCallback, @@ -138,7 +140,7 @@ fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allo for (queued.value.items) |queued_req| { if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); + log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { @@ -219,7 +221,7 @@ const RobotsContext = struct { }; if (robots) |r| { try network.robot_store.put(robots_url, r); - const path = URL.getPathname(l.pending.get(robots_url).?.items[0].url); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url); allowed = r.isAllowed(path); } } diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 42bf99a2..73d0aff3 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -45,8 +45,8 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); defer ctx.network.app.arena_pool.release(arena); - const authority = URL.getHost(req.url); - try wba.signRequest(arena, &our_req.headers, authority); + const authority = URL.getHost(req.params.url); + try wba.signRequest(arena, &our_req.params.headers, authority); return self.next.request(ctx, our_req); } From b4a9bdd7a367dc3fde0d32b00cbd5e9a5f0dacab Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:45:53 -0700 Subject: [PATCH 08/36] use syncRequest in ScriptManager --- src/browser/ScriptManager.zig | 80 ++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 3215eb7b..28a3aefe 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -265,13 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e } if (remote_url) |url| { - errdefer { - if (is_blocking == false) { - self.scriptList(script).remove(&script.node); - } - // Let the outer errdefer handle releasing the arena if client.request fails - } - if (comptime IS_DEBUG) { var ls: js.Local.Scope = undefined; frame.js.localScope(&ls); @@ -285,10 +278,35 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e }); } - { - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; + + const headers = try self.getHeaders(); + errdefer headers.deinit(); + + if (is_blocking) { + const response = try self.client.syncRequest(arena, .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = headers, + .blocking = true, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }); + + script.source = .{ .remote = response.body }; + script.status = response.status; + script.complete = true; + } else { + errdefer { + self.scriptList(script).remove(&script.node); + // Let the outer errdefer handle releasing the arena if client.request fails + } try self.client.request(.{ .ctx = script, @@ -297,8 +315,8 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .method = .GET, .frame_id = frame._frame_id, .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, + .headers = headers, + .blocking = false, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, @@ -319,29 +337,21 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e return; } - // this is , it needs to block the caller - // until it's evaluated - var client = self.client; - while (true) { - if (!script.complete) { - _ = try client.tick(200); - continue; - } - if (script.status == 0) { - // an error (that we already logged) - script.deinit(); - return; - } - - // could have already been evaluating if this is dynamically added - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer { - self.is_evaluating = was_evaluating; - script.deinit(); - } - return script.eval(frame); + if (script.status == 0) { + // an error (that we already logged) + script.deinit(); + return; } + + // could have already been evaluating if this is dynamically added + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer { + self.is_evaluating = was_evaluating; + script.deinit(); + } + + script.eval(frame); } fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { From e988e491361ccff100e8fb4955c10556494e1e22 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:20:30 -0700 Subject: [PATCH 09/36] remove Context and thread *Client --- src/browser/HttpClient.zig | 19 +++------- src/network/layer/CacheLayer.zig | 22 ++++++------ src/network/layer/RobotsLayer.zig | 52 +++++++++++++-------------- src/network/layer/WebBotAuthLayer.zig | 12 +++---- 4 files changed, 48 insertions(+), 57 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0dbee3bd..08c3bed5 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -137,24 +137,16 @@ robots_layer: RobotsLayer, web_bot_auth_layer: WebBotAuthLayer, entry_layer: Layer, -pub const Context = struct { - network: *Network, - - pub fn newHeaders(self: Context) !http.Headers { - return http.Headers.init(self.network.config.http_headers.user_agent_header); - } -}; - pub const Layer = struct { ptr: *anyopaque, vtable: *const VTable, pub const VTable = struct { - request: *const fn (*anyopaque, Context, Request) anyerror!void, + request: *const fn (*anyopaque, *Client, Request) anyerror!void, }; - pub fn request(self: Layer, ctx: Context, req: Request) !void { - return self.vtable.request(self.ptr, ctx, req); + pub fn request(self: Layer, client: *Client, req: Request) !void { + return self.vtable.request(self.ptr, client, req); } }; @@ -402,7 +394,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { +pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); @@ -438,8 +430,7 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } pub fn request(self: *Client, req: Request) !void { - const ctx = Context{ .network = self.network }; - return self.entry_layer.request(ctx, req); + return self.entry_layer.request(self, req); } const SyncContext = struct { diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 7bc36e39..317457b5 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -20,8 +20,8 @@ const std = @import("std"); const log = @import("../../log.zig"); const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; const Transfer = @import("../../browser/HttpClient.zig").Transfer; -const Context = @import("../../browser/HttpClient.zig").Context; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -44,12 +44,12 @@ pub fn layer(self: *CacheLayer) Layer { }; } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *CacheLayer = @ptrCast(@alignCast(ptr)); - const network = ctx.network; + const network = client.network; if (req.params.method != .GET) { - return self.next.request(ctx, req); + return self.next.request(client, req); } const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); @@ -71,7 +71,7 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const cache_ctx = try arena.create(CacheContext); cache_ctx.* = .{ .arena = arena, - .context = ctx, + .client = client, .forward = Forward.fromRequest(req), .req_url = req.params.url, .req_headers = req.params.headers, @@ -89,7 +89,7 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { }, ); - return self.next.request(ctx, wrapped); + return self.next.request(client, wrapped); } fn serveFromCache(req: Request, cached: *const CachedResponse) !void { @@ -138,7 +138,7 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { const CacheContext = struct { arena: std.mem.Allocator, - context: Context, + client: *Client, transfer: ?*Transfer = null, forward: Forward, req_url: [:0]const u8, @@ -208,12 +208,12 @@ const CacheContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); if (self.pending_metadata) |metadata| { - const cache = &self.context.network.cache.?; + const cache = &self.client.network.cache.?; log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { @@ -227,13 +227,13 @@ const CacheContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardShutdown(); } fn errorCallback(ctx: *anyopaque, e: anyerror) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardErr(e); } }; diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 714d205b..add4ee3d 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -21,7 +21,7 @@ const log = @import("../../log.zig"); const URL = @import("../../browser/URL.zig"); const Robots = @import("../Robots.zig"); -const Context = @import("../../browser/HttpClient.zig").Context; +const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -50,16 +50,16 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { self.pending.deinit(allocator); } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); - const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); - errdefer ctx.network.app.arena_pool.release(arena); + const arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer"); + errdefer client.network.app.arena_pool.release(arena); const robots_url = try URL.getRobotsUrl(arena, req.params.url); - if (ctx.network.robot_store.get(robots_url)) |robot_entry| { - defer ctx.network.app.arena_pool.release(arena); + if (client.network.robot_store.get(robots_url)) |robot_entry| { + defer client.network.app.arena_pool.release(arena); switch (robot_entry) { .present => |robots| { @@ -73,20 +73,20 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { }, .absent => {}, } - return self.next.request(ctx, req); + return self.next.request(client, req); } - return self.fetchRobotsThenRequest(ctx, arena, robots_url, req); + return self.fetchRobotsThenRequest(client, arena, robots_url, req); } fn fetchRobotsThenRequest( self: *RobotsLayer, - ctx: Context, + client: *Client, arena: std.mem.Allocator, robots_url: [:0]const u8, req: Request, ) !void { - errdefer ctx.network.app.arena_pool.release(arena); + errdefer client.network.app.arena_pool.release(arena); const entry = try self.pending.getOrPut(self.allocator, robots_url); @@ -97,16 +97,16 @@ fn fetchRobotsThenRequest( const robots_ctx = try arena.create(RobotsContext); robots_ctx.* = .{ .layer = self, - .ctx = ctx, + .client = client, .arena = arena, .robots_url = robots_url, .buffer = .empty, }; - const headers = try ctx.newHeaders(); + const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.next.request(ctx, .{ + try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ .url = robots_url, @@ -127,13 +127,13 @@ fn fetchRobotsThenRequest( .shutdown_callback = RobotsContext.shutdownCallback, }); } else { - ctx.network.app.arena_pool.release(arena); + client.network.app.arena_pool.release(arena); } try entry.value_ptr.append(self.allocator, req); } -fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void { +fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void { var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPending: missing queue"); defer queued.value.deinit(self.allocator); @@ -144,7 +144,7 @@ fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allo defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { - self.next.request(ctx, queued_req) catch |e| { + self.next.request(client, queued_req) catch |e| { defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, e); }; @@ -166,7 +166,7 @@ fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { const RobotsContext = struct { layer: *RobotsLayer, arena: std.mem.Allocator, - ctx: Context, + client: *Client, robots_url: [:0]const u8, buffer: std.ArrayListUnmanaged(u8), status: u16 = 0, @@ -201,12 +201,12 @@ const RobotsContext = struct { fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); var allowed = true; - const network = ctx.network; + const network = client.network; switch (self.status) { 200 => { @@ -239,26 +239,26 @@ const RobotsContext = struct { }, } - l.flushPending(ctx, robots_url, allowed); + l.flushPending(client, robots_url, allowed); } fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); log.warn(.http, "robots fetch failed", .{ .err = err }); - l.flushPending(ctx, robots_url, true); + l.flushPending(client, robots_url, true); } fn shutdownCallback(ctx_ptr: *anyopaque) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); l.flushPendingShutdown(robots_url); diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 73d0aff3..25260872 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -21,7 +21,7 @@ const log = @import("../../log.zig"); const URL = @import("../../browser/URL.zig"); const WebBotAuth = @import("../WebBotAuth.zig"); -const Context = @import("../../browser/HttpClient.zig").Context; +const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -36,17 +36,17 @@ pub fn layer(self: *WebBotAuthLayer) Layer { }; } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); var our_req = req; - const wba = ctx.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); - const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); - defer ctx.network.app.arena_pool.release(arena); + const arena = try client.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); + defer client.network.app.arena_pool.release(arena); const authority = URL.getHost(req.params.url); try wba.signRequest(arena, &our_req.params.headers, authority); - return self.next.request(ctx, our_req); + return self.next.request(client, our_req); } From e37c14a71447203c869ce18f292f0eab97800ff2 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:26:42 -0700 Subject: [PATCH 10/36] Transfer now uses Request's request_id --- src/browser/HttpClient.zig | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 08c3bed5..b9b63a41 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -430,7 +430,10 @@ pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { } pub fn request(self: *Client, req: Request) !void { - return self.entry_layer.request(self, req); + // Assign Request Id. + var our_req = req; + our_req.params.request_id = self.incrReqId(); + return self.entry_layer.request(self, our_req); } const SyncContext = struct { @@ -620,11 +623,10 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); - const id = self.incrReqId(); transfer.* = .{ .start_time = timestamp(.monotonic), .arena = ArenaAllocator.init(self.allocator), - .id = id, + .id = req.params.request_id, .url = req.params.url, .req = req, .client = self, @@ -1002,6 +1004,8 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + request_id: u32 = undefined, frame_id: u32, loader_id: u32, method: Method, From 14ad5c9cdc53aa84995418a43e04f05d1f9df7ca Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:38:47 -0700 Subject: [PATCH 11/36] move RequestStart to InterceptionLayer --- src/Notification.zig | 3 +- src/browser/HttpClient.zig | 41 ++++++++------- src/cdp/domains/network.zig | 67 ++++++++++++++++++++++-- src/cdp/id.zig | 11 ++++ src/network/layer/InterceptionLayer.zig | 68 +++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 22 deletions(-) create mode 100644 src/network/layer/InterceptionLayer.zig diff --git a/src/Notification.zig b/src/Notification.zig index d01d29ce..20f2f12b 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,6 +21,7 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; +const Request = @import("browser/HttpClient.zig").Request; const log = lp.log; const List = std.DoublyLinkedList; @@ -162,7 +163,7 @@ pub const FrameLoaded = struct { }; pub const RequestStart = struct { - transfer: *Transfer, + request: *Request, }; pub const RequestIntercept = struct { diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b9b63a41..ef924e3f 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -44,6 +44,7 @@ const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); +pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -135,6 +136,7 @@ max_response_size: usize, cache_layer: CacheLayer, robots_layer: RobotsLayer, web_bot_auth_layer: WebBotAuthLayer, +interception_layer: InterceptionLayer, entry_layer: Layer, pub const Layer = struct { @@ -200,15 +202,12 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .cache_layer = .{}, .robots_layer = .{ .allocator = allocator }, .web_bot_auth_layer = .{}, + .interception_layer = .{}, .entry_layer = undefined, }; var next = client.layer(); - if (network.config.webBotAuth() != null) { - next = layerWith(&client.web_bot_auth_layer, next); - } - if (network.config.obeyRobots()) { next = layerWith(&client.robots_layer, next); } @@ -217,6 +216,12 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { next = layerWith(&client.cache_layer, next); } + next = layerWith(&client.interception_layer, next); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + client.entry_layer = next; return client; @@ -399,8 +404,6 @@ pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const transfer = try self.makeTransfer(req); - transfer.req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); - var wait_for_interception = false; transfer.req.params.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, @@ -1070,6 +1073,20 @@ pub const Request = struct { error_callback: ErrorCallback, shutdown_callback: ?ShutdownCallback = null, + pub fn getCookieString(self: *Request, allocator: std.mem.Allocator) !?[:0]const u8 { + const jar = self.params.cookie_jar orelse return null; + var aw: std.Io.Writer.Allocating = .init(allocator); + try jar.forRequest(self.params.url, &aw.writer, .{ + .is_http = true, + .origin_url = self.params.cookie_origin, + .is_navigation = self.params.resource_type == .document, + }); + const written = aw.written(); + if (written.len == 0) return null; + try aw.writer.writeByte(0); + return written.ptr[0..written.len :0]; + } + pub fn deinit(self: *const Request) void { self.params.deinit(); } @@ -1382,17 +1399,7 @@ pub const Transfer = struct { } pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.params.url, &aw.writer, .{ - .is_http = true, - .origin_url = self.req.params.cookie_origin, - .is_navigation = self.req.params.resource_type == .document, - }); - const written = aw.written(); - if (written.len == 0) return null; - try aw.writer.writeByte(0); - return written.ptr[0..written.len :0]; + return self.req.getCookieString(self.arena.allocator()); } pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 74c80197..f9e17194 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -27,6 +27,7 @@ const Mime = @import("../../browser/Mime.zig"); const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; const CdpStorage = @import("storage.zig"); @@ -273,8 +274,7 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; @@ -286,11 +286,11 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId(transfer), + .requestId = &id.toRequestId2(req), .loaderId = &id.toLoaderId(req.params.loader_id), .type = req.params.resource_type.string(), .documentURL = frame.url, - .request = TransferAsRequestWriter.init(transfer), + .request = RequestWriter.init(req), .initiator = .{ .type = "other" }, .redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo .hasUserGesture = false, @@ -328,6 +328,65 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request }, .{ .session_id = session_id }); } +pub const RequestWriter = struct { + request: *const Request, + + pub fn init(request: *const Request) RequestWriter { + return .{ + .request = request, + }; + } + + pub fn jsonStringify(self: *const RequestWriter, jws: anytype) !void { + self._jsonStringify(jws) catch return error.WriteFailed; + } + + fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void { + const request = self.request; + + try jws.beginObject(); + { + try jws.objectField("url"); + try jws.write(request.params.url); + } + + { + const frag = URL.getHash(request.params.url); + if (frag.len > 0) { + try jws.objectField("urlFragment"); + try jws.write(frag); + } + } + + { + try jws.objectField("method"); + try jws.write(@tagName(request.params.method)); + } + + { + try jws.objectField("hasPostData"); + try jws.write(request.params.body != null); + } + + { + try jws.objectField("headers"); + try jws.beginObject(); + var it = request.params.headers.iterator(); + while (it.next()) |hdr| { + try jws.objectField(hdr.name); + try jws.write(hdr.value); + } + // TODO: Fix. + // if (try request.getCookieString()) |cookies| { + // try jws.objectField("Cookie"); + // try jws.write(cookies[0 .. cookies.len - 1]); + // } + try jws.endObject(); + } + try jws.endObject(); + } +}; + pub const TransferAsRequestWriter = struct { transfer: *Transfer, diff --git a/src/cdp/id.zig b/src/cdp/id.zig index 5dc3a6c2..cb304f6c 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -52,6 +52,17 @@ pub fn toRequestId(transfer: *const Transfer) [14]u8 { return buf; } +const Request = @import("../browser/HttpClient.zig").Request; +pub fn toRequestId2(req: *const Request) [14]u8 { + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); + } + + var buf: [14]u8 = undefined; + _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable; + return buf; +} + pub fn toInterceptId(id: u32) [14]u8 { var buf: [14]u8 = undefined; _ = std.fmt.bufPrint(&buf, "INT-{d:0>10}", .{id}) catch unreachable; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig new file mode 100644 index 00000000..149e4154 --- /dev/null +++ b/src/network/layer/InterceptionLayer.zig @@ -0,0 +1,68 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); +const log = @import("../../log.zig"); + +const IS_DEBUG = builtin.mode == .Debug; + +const URL = @import("../../browser/URL.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const InterceptionLayer = @This(); + +// Count of intercepted requests. This is to help deal with intercepted requests. +// The client doesn't track intercepted transfers. If a request is intercepted, +// the client forgets about it and requires the interceptor to continue or abort +// it. That works well, except if we only rely on active, we might think there's +// no more network activity when, with interecepted requests, there might be more +// in the future. (We really only need this to properly emit a 'networkIdle' and +// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +intercepted: usize = 0, + +next: Layer = undefined, + +pub fn layer(self: *InterceptionLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { + const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); + var req = in_req; + + req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); + + const wait_for_interception = false; + // req.params.notification.dispatch(.http_request_intercept, &.{ + // .transfer = transfer, + // .wait_for_interception = &wait_for_interception, + // }); + + if (wait_for_interception == false) { + // request not intercepted, process it normally + return self.next.request(client, req); + } + + @panic("not implemented yet"); +} From 6d41ea6fd047ac2217534ac8cb6abdc6b5163a87 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 18:10:13 -0700 Subject: [PATCH 12/36] move arena up to Request instead of Transfer --- src/browser/HttpClient.zig | 27 +++++++++++---------------- src/cdp/domains/fetch.zig | 6 +++--- src/cdp/domains/network.zig | 15 +++++++-------- src/network/layer/RobotsLayer.zig | 2 ++ 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index ef924e3f..6becf460 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -401,7 +401,6 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); - const transfer = try self.makeTransfer(req); var wait_for_interception = false; @@ -436,6 +435,8 @@ pub fn request(self: *Client, req: Request) !void { // Assign Request Id. var our_req = req; our_req.params.request_id = self.incrReqId(); + our_req.params.arena = ArenaAllocator.init(self.allocator); + return self.entry_layer.request(self, our_req); } @@ -621,14 +622,11 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.params.headers.deinit(); - const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); transfer.* = .{ .start_time = timestamp(.monotonic), - .arena = ArenaAllocator.init(self.allocator), .id = req.params.request_id, .url = req.params.url, .req = req, @@ -1007,6 +1005,8 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + arena: ArenaAllocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, frame_id: u32, @@ -1051,6 +1051,7 @@ pub const RequestParams = struct { pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); + self.arena.deinit(); } }; @@ -1073,9 +1074,9 @@ pub const Request = struct { error_callback: ErrorCallback, shutdown_callback: ?ShutdownCallback = null, - pub fn getCookieString(self: *Request, allocator: std.mem.Allocator) !?[:0]const u8 { + pub fn getCookieString(self: *Request) !?[:0]const u8 { const jar = self.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(allocator); + var aw: std.Io.Writer.Allocating = .init(self.params.arena.allocator()); try jar.forRequest(self.params.url, &aw.writer, .{ .is_http = true, .origin_url = self.params.cookie_origin, @@ -1177,7 +1178,6 @@ pub const SyncResponse = struct { }; pub const Transfer = struct { - arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, @@ -1245,7 +1245,6 @@ pub const Transfer = struct { } self.req.deinit(); - self.arena.deinit(); self.client.transfer_pool.destroy(self); } @@ -1337,7 +1336,7 @@ pub const Transfer = struct { try conn.setHeaders(&header_list); // Add cookies from cookie jar. - if (try self.getCookieString()) |cookies| { + if (try self.req.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); } @@ -1398,10 +1397,6 @@ pub const Transfer = struct { } } - pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - return self.req.getCookieString(self.arena.allocator()); - } - pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); @@ -1418,7 +1413,7 @@ pub const Transfer = struct { fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1602,7 +1597,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena.allocator(), cl) catch {}; } } @@ -1615,7 +1610,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena.allocator(), chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 57d0bd56..452c4d05 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -245,7 +245,7 @@ fn continueRequest(cmd: *CDP.Command) !void { .new_url = params.url, }); - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); // Update the request with the new parameters if (params.url) |url| { try transfer.updateURL(try arena.dupeZ(u8, url)); @@ -309,7 +309,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { errdefer transfer.abortAuthChallenge(); // restart the request with the provided credentials. - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); transfer.updateCredentials( try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ params.authChallengeResponse.username, @@ -354,7 +354,7 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try transfer.req.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index f9e17194..4f34dd36 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -329,9 +329,9 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request } pub const RequestWriter = struct { - request: *const Request, + request: *Request, - pub fn init(request: *const Request) RequestWriter { + pub fn init(request: *Request) RequestWriter { return .{ .request = request, }; @@ -376,11 +376,10 @@ pub const RequestWriter = struct { try jws.objectField(hdr.name); try jws.write(hdr.value); } - // TODO: Fix. - // if (try request.getCookieString()) |cookies| { - // try jws.objectField("Cookie"); - // try jws.write(cookies[0 .. cookies.len - 1]); - // } + if (try request.getCookieString()) |cookies| { + try jws.objectField("Cookie"); + try jws.write(cookies[0 .. cookies.len - 1]); + } try jws.endObject(); } try jws.endObject(); @@ -434,7 +433,7 @@ pub const TransferAsRequestWriter = struct { try jws.objectField(hdr.name); try jws.write(hdr.value); } - if (try transfer.getCookieString()) |cookies| { + if (try transfer.req.getCookieString()) |cookies| { try jws.objectField("Cookie"); try jws.write(cookies[0 .. cookies.len - 1]); } diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index add4ee3d..e1de10a0 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -66,6 +66,8 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { + defer req.deinit(); + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); return; From 9c826159a05b0b4f7b8189cbb6111f23d21fffbc Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 19:20:57 -0700 Subject: [PATCH 13/36] crude InterceptionLayer --- src/Notification.zig | 15 +- src/browser/HttpClient.zig | 219 ++++-------------------- src/cdp/CDP.zig | 13 +- src/cdp/domains/fetch.zig | 197 +++++++++++---------- src/cdp/domains/network.zig | 106 +++++++++++- src/network/layer/CacheLayer.zig | 1 + src/network/layer/Forward.zig | 2 + src/network/layer/InterceptionLayer.zig | 176 ++++++++++++++++++- src/network/layer/RobotsLayer.zig | 2 +- 9 files changed, 422 insertions(+), 309 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index 20f2f12b..e5b7c1c4 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -22,6 +22,7 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; const Request = @import("browser/HttpClient.zig").Request; +const Response = @import("browser/HttpClient.zig").Response; const log = lp.log; const List = std.DoublyLinkedList; @@ -167,30 +168,32 @@ pub const RequestStart = struct { }; pub const RequestIntercept = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; pub const RequestAuthRequired = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; pub const ResponseData = struct { data: []const u8, - transfer: *Transfer, + request: *Request, }; pub const ResponseHeaderDone = struct { - transfer: *Transfer, + request: *Request, + response: *const Response, }; pub const RequestDone = struct { - transfer: *Transfer, + request: *Request, + content_length: usize, }; pub const RequestFail = struct { - transfer: *Transfer, + request: *Request, err: anyerror, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 6becf460..6dbb77ff 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -402,33 +402,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - - var wait_for_interception = false; - transfer.req.params.notification.dispatch(.http_request_intercept, &.{ - .transfer = transfer, - .wait_for_interception = &wait_for_interception, - }); - if (wait_for_interception == false) { - // request not intercepted, process it normally - return self.process(transfer); - } - - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - if (req.params.blocking == false) { - // The request was interecepted, but it isn't a blocking request, so we - // dont' need to block this call. The request will be unblocked - // asynchronously via either continueTransfer or abortTransfer - return; - } - - if (try self.waitForInterceptedResponse(transfer)) { - return self.process(transfer); - } + return self.process(transfer); } pub fn request(self: *Client, req: Request) !void { @@ -510,48 +484,6 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) ! } } -fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { - // The request was intercepted and is blocking. This is messy, but our - // callers, the ScriptManager -> Page, don't have a great way to stop the - // parser and return control to the CDP server to wait for the interception - // response. We have some information on the CDPClient, so we'll do the - // blocking here. (This is a bit of a legacy thing. Initially the Client - // had a 'extra_socket' that it could monitor. It was named 'extra_socket' - // to appear generic, but really, that 'extra_socket' was always the CDP - // socket. Because we already had the "extra_socket" here, it was easier to - // make it even more CDP- aware and turn `extra_socket: socket_t` into the - // current CDPClient and do the blocking here). - const cdp_client = self.cdp_client.?; - const ctx = cdp_client.ctx; - - if (cdp_client.blocking_read_start(ctx) == false) { - return error.BlockingInterceptFailure; - } - - defer _ = cdp_client.blocking_read_end(ctx); - - while (true) { - if (cdp_client.blocking_read(ctx) == false) { - return error.BlockingInterceptFailure; - } - - switch (transfer._intercept_state) { - .pending => continue, // keep waiting - .@"continue" => return true, - .abort => |err| { - transfer.abort(err); - return false; - }, - .fulfilled => { - // callbacks already called, just need to cleanups - transfer.deinit(); - return false; - }, - .not_intercepted => unreachable, - } - } -} - // Above, request will not process if there's an interception request. In such // cases, the interceptor is expected to call resume to continue the transfer // or transfer.abort() to abort it. @@ -567,50 +499,6 @@ fn process(self: *Client, transfer: *Transfer) !void { self.queue.append(&transfer._node); } -// For an intercepted request -pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.params.blocking) { - return self.process(transfer); - } - transfer._intercept_state = .@"continue"; -} - -// For an intercepted request -pub fn abortTransfer(self: *Client, transfer: *Transfer) void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.params.blocking) { - transfer.abort(error.Abort); - } - transfer._intercept_state = .{ .abort = error.Abort }; -} - -// For an intercepted request -pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - try transfer.fulfill(status, headers, body); - if (!transfer.req.params.blocking) { - transfer.deinit(); - return; - } - transfer._intercept_state = .fulfilled; -} - pub fn nextReqId(self: *Client) u32 { return self.next_request_id +% 1; } @@ -645,11 +533,6 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.params.notification.dispatch(.http_request_fail, &.{ - .transfer = transfer, - .err = err, - }); - if (execute_callback) { transfer.req.error_callback(transfer.req.ctx, err); } else if (transfer.req.shutdown_callback) |cb| { @@ -757,58 +640,6 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { - // Detect auth challenge from response headers. - // Also check on RecvError: proxy may send 407 with headers before - // closing the connection (CONNECT tunnel not yet established). - if (msg.err == null or msg.err.? == error.RecvError) { - transfer.detectAuthChallenge(msg.conn); - } - - // In case of auth challenge - // TODO give a way to configure the number of auth retries. - if (transfer._auth_challenge != null and transfer._tries < 10) { - var wait_for_interception = false; - transfer.req.params.notification.dispatch( - .http_request_auth_required, - &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, - ); - if (wait_for_interception) { - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - // Whether or not this is a blocking request, we're not going - // to process it now. We can end the transfer, which will - // release the easy handle back into the pool. The transfer - // is still valid/alive (just has no handle). - transfer.releaseConn(); - if (!transfer.req.params.blocking) { - // In the case of an async request, we can just "forget" - // about this transfer until it gets updated asynchronously - // from some CDP command. - return false; - } - - // In the case of a sync request, we need to block until we - // get the CDP command for handling this case. - if (try self.waitForInterceptedResponse(transfer)) { - // we've been asked to continue with the request - // we can't process it here, since we're already inside - // a process, so we need to queue it and wait for the - // next tick (this is why it was safe to releaseConn - // above, because even in the "blocking" path, we still - // only process it on the next tick). - self.queue.append(&transfer._node); - } else { - // aborted, already cleaned up - } - - return false; - } - } - // Handle redirects: reuse the same connection to preserve TCP state. if (msg.err == null) { const status = try msg.conn.getResponseCode(); @@ -874,11 +705,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.params.notification.dispatch(.http_response_data, &.{ - .data = body, - .transfer = transfer, - }); - if (transfer.aborted) { transfer.requestFailed(error.Abort, true); return true; @@ -891,10 +717,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T try transfer.req.done_callback(transfer.req.ctx); - transfer.req.params.notification.dispatch(.http_request_done, &.{ - .transfer = transfer, - }); - return true; } @@ -1093,11 +915,26 @@ pub const Request = struct { } }; +pub const FulfilledResponse = struct { + status: u16, + url: [:0]const u8, + headers: []const http.Header, + body: ?[]const u8, + + pub fn contentType(self: *const FulfilledResponse) ?[]const u8 { + for (self.headers) |hdr| { + if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) return hdr.value; + } + return null; + } +}; + pub const Response = struct { ctx: *anyopaque, inner: union(enum) { transfer: *Transfer, cached: *const CachedResponse, + fulfilled: *const FulfilledResponse, }, pub fn fromTransfer(transfer: *Transfer) Response { @@ -1108,10 +945,15 @@ pub const Response = struct { return .{ .ctx = ctx, .inner = .{ .cached = resp } }; } + pub fn fromFulfilled(ctx: *anyopaque, fulfilled: *const FulfilledResponse) Response { + return .{ .ctx = ctx, .inner = .{ .fulfilled = fulfilled } }; + } + pub fn status(self: Response) ?u16 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.status else null, .cached => |c| c.metadata.status, + .fulfilled => |f| f.status, }; } @@ -1119,6 +961,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| if (t.response_header) |*rh| rh.contentType() else null, .cached => |c| c.metadata.content_type, + .fulfilled => |f| f.contentType(), }; } @@ -1129,13 +972,14 @@ pub const Response = struct { .buffer => |buf| @intCast(buf.len), .file => |f| @intCast(f.len), }, + .fulfilled => |f| if (f.body) |b| @intCast(b.len) else null, }; } pub fn redirectCount(self: Response) ?u32 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.redirect_count else null, - .cached => 0, + .cached, .fulfilled => 0, }; } @@ -1143,6 +987,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.url, .cached => |c| c.metadata.url, + .fulfilled => |f| f.url, }; } @@ -1150,13 +995,14 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.responseHeaderIterator(), .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + .fulfilled => |f| HeaderIterator{ .list = .{ .list = f.headers } }, }; } pub fn abort(self: Response, err: anyerror) void { switch (self.inner) { .transfer => |t| t.abort(err), - .cached => {}, + .cached, .fulfilled => {}, } } @@ -1164,6 +1010,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| try t.format(writer), .cached => |c| try c.format(writer), + .fulfilled => |f| try writer.print("fulfilled {s}", .{f.url}), }; } }; @@ -1301,11 +1148,6 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.params.notification.dispatch(.http_request_fail, &.{ - .transfer = self, - .err = err, - }); - if (execute_callback) { self.req.error_callback(self.req.ctx, err); } else if (self.req.shutdown_callback) |cb| { @@ -1520,6 +1362,7 @@ pub const Transfer = struct { std.debug.assert(self._intercept_state != .not_intercepted); log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); } + self.client.intercepted -= 1; if (!self.req.params.blocking) { self.abort(error.AbortAuthChallenge); @@ -1557,10 +1400,6 @@ pub const Transfer = struct { } } - transfer.req.params.notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 16a08cc9..023f8bf2 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,8 +450,9 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingTransfers()) |transfer| { - transfer.abort(error.ClientDisconnect); + for (self.intercept_state.pendingRequests()) |request| { + defer request.deinit(); + request.error_callback(request.ctx, error.ClientDisconnect); } for (self.isolated_worlds.items) |world| { @@ -668,7 +669,7 @@ pub const BrowserContext = struct { const arena = self.frame_arena; // Prepare the captured response value. - const id = msg.transfer.id; + const id = msg.request.params.request_id; const gop = try self.captured_responses.getOrPut(arena, id); if (!gop.found_existing) { gop.value_ptr.* = .{ @@ -676,8 +677,8 @@ pub const BrowserContext = struct { // Encode the data in base64 by default, but don't encode // for well known content-type. .must_encode = blk: { - const transfer = msg.transfer; - if (transfer.response_header.?.contentType()) |ct| { + const response = msg.response; + if (response.contentType()) |ct| { const mime = try Mime.parse(ct); if (!mime.isText()) { @@ -705,7 +706,7 @@ pub const BrowserContext = struct { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; - const id = msg.transfer.id; + const id = msg.request.params.request_id; const resp = self.captured_responses.getPtr(id) orelse lp.assert(false, "onHttpResponseData missinf captured response", .{}); return resp.data.appendSlice(arena, msg.data); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 452c4d05..a7c5672f 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,7 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.Transfer), + waiting: std.AutoArrayHashMapUnmanaged(u32, HttpClient.Request), pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +67,11 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, transfer: *HttpClient.Transfer) !void { - return self.waiting.put(self.allocator, transfer.id, transfer); + pub fn put(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, request); } - pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.Transfer { + pub fn remove(self: *InterceptState, request_id: u32) ?HttpClient.Request { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +80,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.Transfer { + pub fn pendingRequests(self: *const InterceptState) []HttpClient.Request { return self.waiting.values(); } }; @@ -190,29 +190,28 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. const session_id = bc.session_id orelse return; // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + const request = intercept.request; + try bc.intercept_state.put(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.params.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.params.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID + .networkId = &id.toRequestId2(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -236,39 +235,48 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .new_url = params.url, }); - const arena = transfer.req.params.arena.allocator(); + const arena = request.params.arena.allocator(); // Update the request with the new parameters if (params.url) |url| { - try transfer.updateURL(try arena.dupeZ(u8, url)); + request.params.url = try arena.dupeZ(u8, url); } if (params.method) |method| { - transfer.req.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + request.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { - // Not obvious, but cmd.arena is safe here, since the headers will get - // duped by libcurl. transfer.arena is more obvious/safe, but cmd.arena - // is more efficient (it's re-used) - try transfer.replaceRequestHeaders(cmd.arena, headers); + request.params.headers.deinit(); + + var buf: std.ArrayList(u8) = .empty; + var new_headers = try bc.cdp.browser.http_client.newHeaders(); + for (headers) |hdr| { + defer buf.clearRetainingCapacity(); + try std.fmt.format(buf.writer(cmd.arena), "{s}: {s}", .{ hdr.name, hdr.value }); + try buf.append(cmd.arena, 0); + try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); + } + request.params.headers = new_headers; } if (params.postData) |b| { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.params.body = body; + request.params.body = body; } - try bc.cdp.browser.http_client.continueTransfer(transfer); + // todo: replace. + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -292,33 +300,36 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", - .id = transfer.id, + .id = request.params.request_id, .response = params.authChallengeResponse.response, }); if (params.authChallengeResponse.response != .ProvideCredentials) { - transfer.abortAuthChallenge(); + // TODO: + // request.abortAuthChallenge(); return cmd.sendResult(null, .{}); } + // TODO: // cancel the request, deinit the transfer on error. - errdefer transfer.abortAuthChallenge(); + // errdefer request.abortAuthChallenge(); + // todo: // restart the request with the provided credentials. - const arena = transfer.req.params.arena.allocator(); - transfer.updateCredentials( - try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ - params.authChallengeResponse.username, - params.authChallengeResponse.password, - }, 0), - ); + // const arena = request.params.arena.allocator(); + // request.updateCredentials( + // try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ + // params.authChallengeResponse.username, + // params.authChallengeResponse.password, + // }, 0), + // ); - transfer.reset(); - try bc.cdp.browser.http_client.continueTransfer(transfer); + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -341,12 +352,12 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .status = params.responseCode, .body = params.body != null, }); @@ -354,13 +365,13 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.req.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } - try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); - + const client = bc.cdp.browser.http_client; + try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -374,61 +385,69 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; - defer bc.cdp.browser.http_client.abortTransfer(transfer); + const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const client = bc.cdp.browser.http_client; + defer client.interception_layer.abortRequest(client, request); log.info(.cdp, "request intercept", .{ .state = "fail", .id = request_id, - .url = transfer.url, + .url = request.params.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); } pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { - // detachTarget could be called, in which case, we still have a frame doing - // things, but no session. - const session_id = bc.session_id orelse return; - - // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. - // TODO: What to do when receiving replies for a previous frame's requests? - - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); - - const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; - - try bc.cdp.sendEvent("Fetch.authRequired", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.params.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.params.resource_type) { - .script => "Script", - .xhr => "XHR", - .document => "Document", - .fetch => "Fetch", - }, - .authChallenge = .{ - .origin = "", // TODO get origin, could be the proxy address for example. - .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", - .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", - .realm = challenge.realm orelse "", - }, - .networkId = &id.toRequestId(transfer), - }, .{ .session_id = session_id }); - - log.debug(.cdp, "request auth required", .{ - .state = "paused", - .id = transfer.id, - .url = transfer.url, - }); - // Await continueWithAuth - - intercept.wait_for_interception.* = true; + _ = bc; + _ = intercept; + return error.NullAuthChallenge; } +// pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { +// // detachTarget could be called, in which case, we still have a frame doing +// // things, but no session. +// const session_id = bc.session_id orelse return; + +// // We keep it around to wait for modifications to the request. +// // NOTE: we assume whomever created the request created it with a lifetime of the Page. +// // TODO: What to do when receiving replies for a previous frame's requests? + +// const transfer = intercept.transfer; +// try bc.intercept_state.put(transfer); + +// const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; + +// try bc.cdp.sendEvent("Fetch.authRequired", .{ +// .requestId = &id.toInterceptId(transfer.id), +// .frameId = &id.toFrameId(transfer.req.params.frame_id), +// .request = network.TransferAsRequestWriter.init(transfer), +// .resourceType = switch (transfer.req.params.resource_type) { +// .script => "Script", +// .xhr => "XHR", +// .document => "Document", +// .fetch => "Fetch", +// }, +// .authChallenge = .{ +// .origin = "", // TODO get origin, could be the proxy address for example. +// .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", +// .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", +// .realm = challenge.realm orelse "", +// }, +// .networkId = &id.toRequestId(transfer), +// }, .{ .session_id = session_id }); + +// log.debug(.cdp, "request auth required", .{ +// .state = "paused", +// .id = transfer.id, +// .url = transfer.url, +// }); +// // Await continueWithAuth + +// intercept.wait_for_interception.* = true; +// } + // Get u32 from requestId which is formatted as: "INT-{d}" fn idFromRequestId(request_id: []const u8) !u32 { if (!std.mem.startsWith(u8, request_id, "INT-")) { diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 4f34dd36..06581c15 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -28,6 +28,7 @@ const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; const CdpStorage = @import("storage.zig"); @@ -261,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId(msg.transfer), + .requestId = &id.toRequestId2(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -304,15 +305,14 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ .frameId = &id.toFrameId(req.params.frame_id), - .requestId = &id.toRequestId(transfer), + .requestId = &id.toRequestId2(req), .loaderId = &id.toLoaderId(req.params.loader_id), - .response = TransferAsResponseWriter.init(arena, transfer), + .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); } @@ -321,10 +321,10 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request // detachTarget could be called, in which case, we still have a frame doing // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; + const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId(transfer), - .encodedDataLength = transfer.bytes_received, + .requestId = &id.toRequestId2(req), + .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } @@ -443,6 +443,96 @@ pub const TransferAsRequestWriter = struct { } }; +const ResponseWriter = struct { + arena: Allocator, + response: *const Response, + + fn init(arena: Allocator, response: *const Response) ResponseWriter { + return .{ + .arena = arena, + .response = response, + }; + } + + pub fn jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + self._jsonStringify(jws) catch return error.WriteFailed; + } + + fn _jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + const response = self.response; + + try jws.beginObject(); + { + try jws.objectField("url"); + try jws.write(response.url()); + } + + if (response.status()) |status| { + try jws.objectField("status"); + try jws.write(status); + + try jws.objectField("statusText"); + try jws.write(@as(std.http.Status, @enumFromInt(status)).phrase() orelse "Unknown"); + } + + { + const mime: Mime = blk: { + if (response.contentType()) |ct| { + break :blk try Mime.parse(ct); + } + break :blk .unknown; + }; + + try jws.objectField("mimeType"); + try jws.write(mime.contentTypeString()); + try jws.objectField("charset"); + try jws.write(mime.charsetString()); + } + + { + try jws.objectField("timing"); + try jws.write(.{ + // TODO: fix + .requestTime = -1, + .connectEnd = -1, + .connectStart = -1, + .dnsEnd = -1, + .dnsStart = -1, + .proxyEnd = -1, + .proxyStart = -1, + .receiveHeadersEnd = -1, + .receiveHeadersStart = -1, + .sendEnd = -1, + .sendStart = -1, + .sslEnd = -1, + .sslStart = -1, + }); + } + + { + // chromedp doesn't like having duplicate header names. It's pretty + // common to get these from a server (e.g. for Cache-Control), but + // Chrome joins these. So we have to too. + const arena = self.arena; + var it = response.headerIterator(); + var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; + while (it.next()) |hdr| { + const gop = try map.getOrPut(arena, hdr.name); + if (gop.found_existing) { + // yes, chrome joins multi-value headers with a \n + gop.value_ptr.* = try std.mem.join(arena, "\n", &.{ gop.value_ptr.*, hdr.value }); + } else { + gop.value_ptr.* = hdr.value; + } + } + + try jws.objectField("headers"); + try jws.write(std.json.ArrayHashMap([]const u8){ .map = map }); + } + try jws.endObject(); + } +}; + const TransferAsResponseWriter = struct { arena: Allocator, transfer: *Transfer, diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 317457b5..6c5c72c6 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -80,6 +80,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wrapped = cache_ctx.forward.wrapRequest( req, cache_ctx, + client.incrReqId(), .{ .start = CacheContext.startCallback, .header = CacheContext.headerCallback, diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig index b11ff23f..2a81ec14 100644 --- a/src/network/layer/Forward.zig +++ b/src/network/layer/Forward.zig @@ -54,12 +54,14 @@ pub fn wrapRequest( self: *Forward, req: Request, new_ctx: anytype, + new_id: u32, overrides: Overrides, ) Request { const T = @TypeOf(new_ctx.*); const PassthroughT = makePassthrough(T, "forward"); var wrapped = req; wrapped.ctx = new_ctx; + wrapped.params.request_id = new_id; wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; wrapped.header_callback = overrides.header orelse PassthroughT.header; wrapped.data_callback = overrides.data orelse PassthroughT.data; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 149e4154..d6905614 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -22,10 +22,14 @@ const log = @import("../../log.zig"); const IS_DEBUG = builtin.mode == .Debug; +const http = @import("../http.zig"); const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); const InterceptionLayer = @This(); @@ -49,20 +53,174 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - var req = in_req; + var pre_wrap_req = in_req; + + // Wrap callbacks to intercept notifications + const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); + intercept_ctx.* = .{ + .forward = Forward.fromRequest(pre_wrap_req), + .request = pre_wrap_req, + }; + + var req = intercept_ctx.forward.wrapRequest( + pre_wrap_req, + intercept_ctx, + client.incrReqId(), + .{ + .start = InterceptContext.startCallback, + .header = InterceptContext.headerCallback, + .data = InterceptContext.dataCallback, + .done = InterceptContext.doneCallback, + .err = InterceptContext.errorCallback, + .shutdown = InterceptContext.shutdownCallback, + }, + ); req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); - const wait_for_interception = false; - // req.params.notification.dispatch(.http_request_intercept, &.{ - // .transfer = transfer, - // .wait_for_interception = &wait_for_interception, - // }); + var wait_for_interception = false; + req.params.notification.dispatch(.http_request_intercept, &.{ + .request = &req, + .wait_for_interception = &wait_for_interception, + }); - if (wait_for_interception == false) { - // request not intercepted, process it normally + if (!wait_for_interception) { return self.next.request(client, req); } - @panic("not implemented yet"); + self.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); + } +} + +pub const InterceptContext = struct { + forward: Forward, + request: Request, + content_length: usize = 0, + + fn startCallback(response: Response) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + self.content_length = response.contentLength() orelse 0; + + self.request.params.notification.dispatch(.http_response_header_done, &.{ + .request = &self.request, + .response = &response, + }); + return self.forward.forwardHeader(response); + } + + fn dataCallback(response: Response, chunk: []const u8) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + + self.request.params.notification.dispatch(.http_response_data, &.{ + .data = chunk, + .request = &self.request, + }); + + return self.forward.forwardData(response, chunk); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.request.params.notification.dispatch(.http_request_done, &.{ + .request = &self.request, + .content_length = self.content_length, + }); + return self.forward.forwardDone(); + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = err, + }); + self.forward.forwardErr(err); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardShutdown(); + } +}; + +// CDP Callbacks + +pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { + if (comptime IS_DEBUG) { + log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + return self.next.request(client, req); +} + +pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { + _ = client; + + if (comptime IS_DEBUG) { + log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); + } + self.intercepted -= 1; + + defer req.deinit(); + req.error_callback(req.ctx, error.Abort); +} + +fn fulfillInner( + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + const fulfilled = FulfilledResponse{ + .status = status, + .url = req.params.url, + .headers = headers, + .body = body, + }; + + const response = Response.fromFulfilled(req.ctx, &fulfilled); + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + if (body) |b| { + try req.data_callback(response, b); + } + + try req.done_callback(req.ctx); +} + +pub fn fulfillRequest( + self: *InterceptionLayer, + _: *Client, + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + if (comptime IS_DEBUG) { + log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer req.deinit(); + + fulfillInner(req, status, headers, body) catch |err| { + req.error_callback(req.ctx, err); + return err; + }; } diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index e1de10a0..eb38b720 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -190,7 +190,7 @@ const RobotsContext = struct { try self.buffer.ensureTotalCapacity(self.arena, cl); } }, - .cached => {}, + else => {}, } return true; } From 0d50f706dbddc40eae912e1e5d75e4025b1251e5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 19:56:13 -0700 Subject: [PATCH 14/36] more fixing of hanging in cdp interception --- src/Notification.zig | 2 + src/browser/HttpClient.zig | 39 ++++++----- src/browser/Runner.zig | 4 +- src/cdp/domains/fetch.zig | 87 ++++++++++++------------- src/cdp/domains/page.zig | 2 +- src/network/layer/InterceptionLayer.zig | 56 ++++++++++++++++ 6 files changed, 121 insertions(+), 69 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index e5b7c1c4..96b8f822 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -23,6 +23,7 @@ const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; const Request = @import("browser/HttpClient.zig").Request; const Response = @import("browser/HttpClient.zig").Response; +const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext; const log = lp.log; const List = std.DoublyLinkedList; @@ -174,6 +175,7 @@ pub const RequestIntercept = struct { pub const RequestAuthRequired = struct { request: *Request, + intercept_ctx: *InterceptContext, wait_for_interception: *bool, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 6dbb77ff..042b5406 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -65,15 +65,6 @@ ws_active: usize = 0, // Count of active http requests http_active: usize = 0, -// Count of intercepted requests. This is to help deal with intercepted requests. -// The client doesn't track intercepted transfers. If a request is intercepted, -// the client forgets about it and requires the interceptor to continue or abort -// it. That works well, except if we only rely on active, we might think there's -// no more network activity when, with interecepted requests, there might be more -// in the future. (We really only need this to properly emit a 'networkIdle' and -// 'networkAlmostIdle' Page.lifecycleEvent in CDP). -intercepted: usize = 0, - // Our curl multi handle. handles: http.Handles, @@ -471,7 +462,15 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) ! }); while (sync_ctx.completion == .in_progress) { - _ = try self.tick(200); + const status = try self.tick(200); + log.debug(.http, "sync request tick", .{ .status = status }); + switch (status) { + .cdp_socket => { + const cdp = self.cdp_client.?; + _ = cdp.blocking_read(cdp.ctx); + }, + .normal => continue, + } } switch (sync_ctx.completion) { @@ -831,6 +830,7 @@ pub const RequestParams = struct { arena: ArenaAllocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, + frame_id: u32, loader_id: u32, method: Method, @@ -1312,25 +1312,24 @@ pub const Transfer = struct { } } - fn detectAuthChallenge(transfer: *Transfer, conn: *const http.Connection) void { - const status = conn.getResponseCode() catch return; - const connect_status = conn.getConnectCode() catch return; + pub fn detectAuthChallenge(conn: *const http.Connection) ?http.AuthChallenge { + const status = conn.getResponseCode() catch return null; + const connect_status = conn.getConnectCode() catch return null; if (status != 401 and status != 407 and connect_status != 401 and connect_status != 407) { - transfer._auth_challenge = null; - return; + return null; } if (conn.getResponseHeader("WWW-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; + return http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getConnectHeader("WWW-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; + return http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getResponseHeader("Proxy-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else if (conn.getConnectHeader("Proxy-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else { - transfer._auth_challenge = .{ .status = status, .source = null, .scheme = null, .realm = null }; + return .{ .status = status, .source = null, .scheme = null, .realm = null }; } } diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 68a7ad59..2489382e 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -185,7 +185,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { try frame.dispatchLoad(); const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { frame.notifyNetworkAlmostIdle(); } @@ -211,7 +211,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { // because is_cdp is false, and that can only be // the case when interception isn't possible. if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); + std.debug.assert(http_client.interception_layer.intercepted == 0); } if (browser.hasBackgroundTasks()) { diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index a7c5672f..2de7ee3d 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -400,54 +400,49 @@ fn failRequest(cmd: *CDP.Command) !void { } pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { - _ = bc; - _ = intercept; - return error.NullAuthChallenge; + // detachTarget could be called, in which case, we still have a frame doing + // things, but no session. + const session_id = bc.session_id orelse return; + + // We keep it around to wait for modifications to the request. + // NOTE: we assume whomever created the request created it with a lifetime of the Page. + // TODO: What to do when receiving replies for a previous frame's requests? + + const intercept_ctx = intercept.intercept_ctx; + const request = intercept.request; + try bc.intercept_state.put(request.*); + + const challenge = intercept_ctx.auth_challenge orelse return error.NullAuthChallenge; + + try bc.cdp.sendEvent("Fetch.authRequired", .{ + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { + .script => "Script", + .xhr => "XHR", + .document => "Document", + .fetch => "Fetch", + }, + .authChallenge = .{ + .origin = "", // TODO get origin, could be the proxy address for example. + .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", + .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", + .realm = challenge.realm orelse "", + }, + .networkId = &id.toRequestId2(request), + }, .{ .session_id = session_id }); + + log.debug(.cdp, "request auth required", .{ + .state = "paused", + .id = request.params.request_id, + .url = request.params.url, + }); + // Await continueWithAuth + + intercept.wait_for_interception.* = true; } -// pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { -// // detachTarget could be called, in which case, we still have a frame doing -// // things, but no session. -// const session_id = bc.session_id orelse return; - -// // We keep it around to wait for modifications to the request. -// // NOTE: we assume whomever created the request created it with a lifetime of the Page. -// // TODO: What to do when receiving replies for a previous frame's requests? - -// const transfer = intercept.transfer; -// try bc.intercept_state.put(transfer); - -// const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; - -// try bc.cdp.sendEvent("Fetch.authRequired", .{ -// .requestId = &id.toInterceptId(transfer.id), -// .frameId = &id.toFrameId(transfer.req.params.frame_id), -// .request = network.TransferAsRequestWriter.init(transfer), -// .resourceType = switch (transfer.req.params.resource_type) { -// .script => "Script", -// .xhr => "XHR", -// .document => "Document", -// .fetch => "Fetch", -// }, -// .authChallenge = .{ -// .origin = "", // TODO get origin, could be the proxy address for example. -// .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", -// .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", -// .realm = challenge.realm orelse "", -// }, -// .networkId = &id.toRequestId(transfer), -// }, .{ .session_id = session_id }); - -// log.debug(.cdp, "request auth required", .{ -// .state = "paused", -// .id = transfer.id, -// .url = transfer.url, -// }); -// // Await continueWithAuth - -// intercept.wait_for_interception.* = true; -// } - // Get u32 from requestId which is formatted as: "INT-{d}" fn idFromRequestId(request_id: []const u8) !u32 { if (!std.mem.startsWith(u8, request_id, "INT-")) { diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index c77319ae..239b5122 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -145,7 +145,7 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void { const http_client = frame._session.browser.http_client; const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index d6905614..f5bc6aaa 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -25,6 +25,7 @@ const IS_DEBUG = builtin.mode == .Debug; const http = @import("../http.zig"); const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; @@ -59,6 +60,7 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); intercept_ctx.* = .{ .forward = Forward.fromRequest(pre_wrap_req), + .layer = self, .request = pre_wrap_req, }; @@ -96,18 +98,59 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { pub const InterceptContext = struct { forward: Forward, + layer: *InterceptionLayer, request: Request, content_length: usize = 0, + auth_challenge: ?http.AuthChallenge = null, + tries: usize = 0, + fn startCallback(response: Response) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept start", .{ .url = self.request.params.url }); return self.forward.forwardStart(response); } fn headerCallback(response: Response) anyerror!bool { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept header", .{ + .url = self.request.params.url, + .status = response.status(), + .content_length = response.contentLength(), + }); + self.content_length = response.contentLength() orelse 0; + switch (response.inner) { + .transfer => |t| { + const status = t.response_header.?.status; + if (status == 401 or status == 407) { + self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + + if (self.auth_challenge != null and self.tries < 10) { + var wait_for_interception = false; + + self.request.params.notification.dispatch(.http_request_auth_required, &.{ + .request = &self.request, + .intercept_ctx = self, + .wait_for_interception = &wait_for_interception, + }); + + if (wait_for_interception) { + log.debug(.http, "intercept auth required", .{ + .url = self.request.params.url, + .status = status, + .intercepted = self.layer.intercepted, + }); + self.layer.intercepted += 1; + return false; + } + } + } + }, + else => {}, + } + self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, @@ -117,6 +160,10 @@ pub const InterceptContext = struct { fn dataCallback(response: Response, chunk: []const u8) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept data", .{ + .url = self.request.params.url, + .len = chunk.len, + }); self.request.params.notification.dispatch(.http_response_data, &.{ .data = chunk, @@ -128,6 +175,10 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept done", .{ + .url = self.request.params.url, + .content_length = self.content_length, + }); self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, @@ -137,6 +188,10 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept error", .{ + .url = self.request.params.url, + .err = err, + }); self.request.params.notification.dispatch(.http_request_fail, &.{ .request = &self.request, .err = err, @@ -146,6 +201,7 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.forward.forwardShutdown(); } }; From dddd0dfb90db95274f37def99dcafa7e686c631c Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 21:11:13 -0700 Subject: [PATCH 15/36] fix request id mismatch on cdp --- src/network/layer/CacheLayer.zig | 1 - src/network/layer/Forward.zig | 2 - src/network/layer/InterceptionLayer.zig | 65 ++++++++++++++----------- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 6c5c72c6..317457b5 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -80,7 +80,6 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wrapped = cache_ctx.forward.wrapRequest( req, cache_ctx, - client.incrReqId(), .{ .start = CacheContext.startCallback, .header = CacheContext.headerCallback, diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig index 2a81ec14..b11ff23f 100644 --- a/src/network/layer/Forward.zig +++ b/src/network/layer/Forward.zig @@ -54,14 +54,12 @@ pub fn wrapRequest( self: *Forward, req: Request, new_ctx: anytype, - new_id: u32, overrides: Overrides, ) Request { const T = @TypeOf(new_ctx.*); const PassthroughT = makePassthrough(T, "forward"); var wrapped = req; wrapped.ctx = new_ctx; - wrapped.params.request_id = new_id; wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; wrapped.header_callback = overrides.header orelse PassthroughT.header; wrapped.data_callback = overrides.data orelse PassthroughT.data; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index f5bc6aaa..21225815 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -67,7 +67,6 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { var req = intercept_ctx.forward.wrapRequest( pre_wrap_req, intercept_ctx, - client.incrReqId(), .{ .start = InterceptContext.startCallback, .header = InterceptContext.headerCallback, @@ -86,6 +85,12 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { .wait_for_interception = &wait_for_interception, }); + log.debug(.http, "interception check", .{ + .wait_for_interception = wait_for_interception, + .intercepted = self.intercepted, + .url = req.params.url, + }); + if (!wait_for_interception) { return self.next.request(client, req); } @@ -121,35 +126,35 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - switch (response.inner) { - .transfer => |t| { - const status = t.response_header.?.status; - if (status == 401 or status == 407) { - self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + // switch (response.inner) { + // .transfer => |t| { + // const status = t.response_header.?.status; + // if (status == 401 or status == 407) { + // self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - if (self.auth_challenge != null and self.tries < 10) { - var wait_for_interception = false; + // if (self.auth_challenge != null and self.tries < 10) { + // var wait_for_interception = false; - self.request.params.notification.dispatch(.http_request_auth_required, &.{ - .request = &self.request, - .intercept_ctx = self, - .wait_for_interception = &wait_for_interception, - }); + // self.request.params.notification.dispatch(.http_request_auth_required, &.{ + // .request = &self.request, + // .intercept_ctx = self, + // .wait_for_interception = &wait_for_interception, + // }); - if (wait_for_interception) { - log.debug(.http, "intercept auth required", .{ - .url = self.request.params.url, - .status = status, - .intercepted = self.layer.intercepted, - }); - self.layer.intercepted += 1; - return false; - } - } - } - }, - else => {}, - } + // if (wait_for_interception) { + // log.debug(.http, "intercept auth required", .{ + // .url = self.request.params.url, + // .status = status, + // .intercepted = self.layer.intercepted, + // }); + // self.layer.intercepted += 1; + // return false; + // } + // } + // } + // }, + // else => {}, + // } self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, @@ -202,6 +207,10 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = error.Shutdown, + }); self.forward.forwardShutdown(); } }; @@ -269,7 +278,7 @@ pub fn fulfillRequest( body: ?[]const u8, ) !void { if (comptime IS_DEBUG) { - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); + log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; From d0b421b085c9f502e455dcd8e63f88ce71869f2b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 21:41:59 -0700 Subject: [PATCH 16/36] partial auth challenge support --- src/cdp/domains/fetch.zig | 30 +++++------ src/network/layer/InterceptionLayer.zig | 71 +++++++++++++++---------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 2de7ee3d..0f638b81 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -300,7 +300,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -308,27 +308,27 @@ fn continueWithAuth(cmd: *CDP.Command) !void { .response = params.authChallengeResponse.response, }); + const client = bc.cdp.browser.http_client; + if (params.authChallengeResponse.response != .ProvideCredentials) { - // TODO: - // request.abortAuthChallenge(); + client.interception_layer.abortAuthChallenge(request); return cmd.sendResult(null, .{}); } - // TODO: // cancel the request, deinit the transfer on error. - // errdefer request.abortAuthChallenge(); + errdefer client.interception_layer.abortAuthChallenge(request); - // todo: - // restart the request with the provided credentials. - // const arena = request.params.arena.allocator(); - // request.updateCredentials( - // try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ - // params.authChallengeResponse.username, - // params.authChallengeResponse.password, - // }, 0), - // ); + const arena = request.params.arena.allocator(); + request.params.credentials = try std.fmt.allocPrintSentinel( + arena, + "{s}:{s}", + .{ + params.authChallengeResponse.username, + params.authChallengeResponse.password, + }, + 0, + ); - const client = bc.cdp.browser.http_client; try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 21225815..823a5314 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -126,40 +126,21 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - // switch (response.inner) { - // .transfer => |t| { - // const status = t.response_header.?.status; - // if (status == 401 or status == 407) { - // self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - - // if (self.auth_challenge != null and self.tries < 10) { - // var wait_for_interception = false; - - // self.request.params.notification.dispatch(.http_request_auth_required, &.{ - // .request = &self.request, - // .intercept_ctx = self, - // .wait_for_interception = &wait_for_interception, - // }); - - // if (wait_for_interception) { - // log.debug(.http, "intercept auth required", .{ - // .url = self.request.params.url, - // .status = status, - // .intercepted = self.layer.intercepted, - // }); - // self.layer.intercepted += 1; - // return false; - // } - // } - // } - // }, - // else => {}, - // } + switch (response.inner) { + .transfer => |t| { + const status = t.response_header.?.status; + if (status == 401 or status == 407) { + self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + } + }, + else => {}, + } self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, }); + return self.forward.forwardHeader(response); } @@ -184,6 +165,27 @@ pub const InterceptContext = struct { .url = self.request.params.url, .content_length = self.content_length, }); + + // if (self.auth_challenge != null and self.tries < 10) { + // var wait_for_interception = false; + // self.request.params.notification.dispatch(.http_request_auth_required, &.{ + // .request = &self.request, + // .intercept_ctx = self, + // .wait_for_interception = &wait_for_interception, + // }); + + // if (wait_for_interception) { + // log.debug(.http, "intercept auth required", .{ + // .url = self.request.params.url, + // .intercepted = self.layer.intercepted, + // }); + // self.layer.intercepted += 1; + // self.tries += 1; + // // Don't forward done — CDP owns this now, will retry via continueWithAuth + // return; + // } + // } + self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, @@ -289,3 +291,14 @@ pub fn fulfillRequest( return err; }; } + +pub fn abortAuthChallenge(self: *InterceptionLayer, req: Request) void { + if (comptime IS_DEBUG) { + log.debug(.http, "abort auth transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer req.deinit(); + req.error_callback(req.ctx, error.AbortAuthChallenge); + return; +} From 3db3281e8eff0415f9de95ce93b27c7e5396e37d Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:04:28 -0700 Subject: [PATCH 17/36] working authentication with InterceptionLayer --- src/Notification.zig | 3 +- src/browser/HttpClient.zig | 68 ++++++++--- src/browser/ScriptManager.zig | 4 - src/cdp/CDP.zig | 13 ++- src/cdp/domains/fetch.zig | 58 +++++---- src/cdp/domains/network.zig | 149 ------------------------ src/network/layer/InterceptionLayer.zig | 33 ------ 7 files changed, 98 insertions(+), 230 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index 96b8f822..f429e7b9 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -174,8 +174,7 @@ pub const RequestIntercept = struct { }; pub const RequestAuthRequired = struct { - request: *Request, - intercept_ctx: *InterceptContext, + transfer: *Transfer, wait_for_interception: *bool, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 042b5406..7600ad5a 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -639,6 +639,33 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { + if (msg.err == null or msg.err.? == error.RecvError) { + transfer.detectAuthChallenge(msg.conn); + } + + // In case of auth challenge + // TODO give a way to configure the number of auth retries. + if (transfer._auth_challenge != null and transfer._tries < 10) { + var wait_for_interception = false; + transfer.req.params.notification.dispatch( + .http_request_auth_required, + &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, + ); + if (wait_for_interception) { + self.interception_layer.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for auth interception", .{ .intercepted = self.interception_layer.intercepted }); + } + + // Whether or not this is a blocking request, we're not going + // to process it now. We can end the transfer, which will + // release the easy handle back into the pool. The transfer + // is still valid/alive (just has no handle). + transfer.releaseConn(); + return false; + } + } + // Handle redirects: reuse the same connection to preserve TCP state. if (msg.err == null) { const status = try msg.conn.getResponseCode(); @@ -1068,7 +1095,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - _intercept_state: InterceptState = .not_intercepted, const InterceptState = union(enum) { not_intercepted, @@ -1312,24 +1338,25 @@ pub const Transfer = struct { } } - pub fn detectAuthChallenge(conn: *const http.Connection) ?http.AuthChallenge { - const status = conn.getResponseCode() catch return null; - const connect_status = conn.getConnectCode() catch return null; + fn detectAuthChallenge(transfer: *Transfer, conn: *const http.Connection) void { + const status = conn.getResponseCode() catch return; + const connect_status = conn.getConnectCode() catch return; if (status != 401 and status != 407 and connect_status != 401 and connect_status != 407) { - return null; + transfer._auth_challenge = null; + return; } if (conn.getResponseHeader("WWW-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .server, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getConnectHeader("WWW-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .server, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getResponseHeader("Proxy-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else if (conn.getConnectHeader("Proxy-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else { - return .{ .status = status, .source = null, .scheme = null, .realm = null }; + transfer._auth_challenge = .{ .status = status, .source = null, .scheme = null, .realm = null }; } } @@ -1358,16 +1385,12 @@ pub const Transfer = struct { // before interception process. pub fn abortAuthChallenge(self: *Transfer) void { if (comptime IS_DEBUG) { - std.debug.assert(self._intercept_state != .not_intercepted); - log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); + log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.interception_layer.intercepted }); } - self.client.intercepted -= 1; - if (!self.req.params.blocking) { - self.abort(error.AbortAuthChallenge); - return; - } - self._intercept_state = .{ .abort = error.AbortAuthChallenge }; + self.client.interception_layer.intercepted -= 1; + self.abort(error.AbortAuthChallenge); + return; } // headerDoneCallback is called once the headers have been read. @@ -1551,6 +1574,15 @@ pub const Transfer = struct { } }; +pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { + if (comptime IS_DEBUG) { + log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); + } + + self.interception_layer.intercepted -= 1; + return self.process(transfer); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 28a3aefe..f707f390 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -680,7 +680,6 @@ pub const Script = struct { debug_transfer_aborted: bool = false, debug_transfer_bytes_received: usize = 0, debug_transfer_notified_fail: bool = false, - debug_transfer_intercept_state: u8 = 0, debug_transfer_auth_challenge: bool = false, debug_transfer_easy_id: usize = 0, @@ -756,7 +755,6 @@ pub const Script = struct { .a3 = self.debug_transfer_aborted, .a4 = self.debug_transfer_bytes_received, .a5 = self.debug_transfer_notified_fail, - .a7 = self.debug_transfer_intercept_state, .a8 = self.debug_transfer_auth_challenge, .a9 = self.debug_transfer_easy_id, .b1 = transfer.id, @@ -764,7 +762,6 @@ pub const Script = struct { .b3 = transfer.aborted, .b4 = transfer.bytes_received, .b5 = transfer._notified_fail, - .b7 = @intFromEnum(transfer._intercept_state), .b8 = transfer._auth_challenge != null, .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, }); @@ -774,7 +771,6 @@ pub const Script = struct { self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; }, diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 023f8bf2..326e93c5 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,9 +450,16 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingRequests()) |request| { - defer request.deinit(); - request.error_callback(request.ctx, error.ClientDisconnect); + for (self.intercept_state.pendingIntercepts()) |intercept| { + switch (intercept) { + .transfer => |t| { + t.abort(error.ClientDisconnect); + }, + .request => |r| { + defer r.deinit(); + r.error_callback(r.ctx, error.ClientDisconnect); + }, + } } for (self.isolated_worlds.items) |world| { diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 0f638b81..7eff321e 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,12 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, HttpClient.Request), + waiting: std.AutoArrayHashMapUnmanaged(u32, Pending), + + const Pending = union(enum) { + transfer: *HttpClient.Transfer, + request: HttpClient.Request, + }; pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +72,15 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, request: HttpClient.Request) !void { - return self.waiting.put(self.allocator, request.params.request_id, request); + pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request }); } - pub fn remove(self: *InterceptState, request_id: u32) ?HttpClient.Request { + pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void { + return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer }); + } + + pub fn remove(self: *InterceptState, request_id: u32) ?Pending { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +89,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingRequests(self: *const InterceptState) []HttpClient.Request { + pub fn pendingIntercepts(self: *const InterceptState) []Pending { return self.waiting.values(); } }; @@ -193,7 +202,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. // TODO: What to do when receiving replies for a previous frame's requests? const request = intercept.request; - try bc.intercept_state.put(request.*); + try bc.intercept_state.putRequest(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ .requestId = &id.toInterceptId(request.params.request_id), @@ -235,7 +244,9 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "continue", @@ -300,7 +311,9 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const transfer = pending.transfer; + var request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -311,15 +324,15 @@ fn continueWithAuth(cmd: *CDP.Command) !void { const client = bc.cdp.browser.http_client; if (params.authChallengeResponse.response != .ProvideCredentials) { - client.interception_layer.abortAuthChallenge(request); + transfer.abortAuthChallenge(); return cmd.sendResult(null, .{}); } // cancel the request, deinit the transfer on error. - errdefer client.interception_layer.abortAuthChallenge(request); + errdefer transfer.abortAuthChallenge(); const arena = request.params.arena.allocator(); - request.params.credentials = try std.fmt.allocPrintSentinel( + transfer.updateCredentials(try std.fmt.allocPrintSentinel( arena, "{s}:{s}", .{ @@ -327,9 +340,9 @@ fn continueWithAuth(cmd: *CDP.Command) !void { params.authChallengeResponse.password, }, 0, - ); + )); - try client.interception_layer.continueRequest(client, request); + try client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -352,7 +365,9 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", @@ -385,7 +400,8 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = pending.request; const client = bc.cdp.browser.http_client; defer client.interception_layer.abortRequest(client, request); @@ -408,16 +424,16 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const intercept_ctx = intercept.intercept_ctx; - const request = intercept.request; - try bc.intercept_state.put(request.*); + const transfer = intercept.transfer; + try bc.intercept_state.putTransfer(transfer); + var request = transfer.req; - const challenge = intercept_ctx.auth_challenge orelse return error.NullAuthChallenge; + const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; try bc.cdp.sendEvent("Fetch.authRequired", .{ .requestId = &id.toInterceptId(request.params.request_id), .frameId = &id.toFrameId(request.params.frame_id), - .request = network.RequestWriter.init(request), + .request = network.RequestWriter.init(&request), .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", @@ -430,7 +446,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId2(request), + .networkId = &id.toRequestId2(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 06581c15..1458bb0c 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -386,63 +386,6 @@ pub const RequestWriter = struct { } }; -pub const TransferAsRequestWriter = struct { - transfer: *Transfer, - - pub fn init(transfer: *Transfer) TransferAsRequestWriter { - return .{ - .transfer = transfer, - }; - } - - pub fn jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - self._jsonStringify(jws) catch return error.WriteFailed; - } - fn _jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - const transfer = self.transfer; - - try jws.beginObject(); - { - try jws.objectField("url"); - try jws.write(transfer.url); - } - - { - const frag = URL.getHash(transfer.url); - if (frag.len > 0) { - try jws.objectField("urlFragment"); - try jws.write(frag); - } - } - - { - try jws.objectField("method"); - try jws.write(@tagName(transfer.req.params.method)); - } - - { - try jws.objectField("hasPostData"); - try jws.write(transfer.req.params.body != null); - } - - { - try jws.objectField("headers"); - try jws.beginObject(); - var it = transfer.req.params.headers.iterator(); - while (it.next()) |hdr| { - try jws.objectField(hdr.name); - try jws.write(hdr.value); - } - if (try transfer.req.getCookieString()) |cookies| { - try jws.objectField("Cookie"); - try jws.write(cookies[0 .. cookies.len - 1]); - } - try jws.endObject(); - } - try jws.endObject(); - } -}; - const ResponseWriter = struct { arena: Allocator, response: *const Response, @@ -533,98 +476,6 @@ const ResponseWriter = struct { } }; -const TransferAsResponseWriter = struct { - arena: Allocator, - transfer: *Transfer, - - fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { - return .{ - .arena = arena, - .transfer = transfer, - }; - } - - pub fn jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - self._jsonStringify(jws) catch return error.WriteFailed; - } - - fn _jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - const transfer = self.transfer; - - try jws.beginObject(); - { - try jws.objectField("url"); - try jws.write(transfer.url); - } - - if (transfer.response_header) |*rh| { - // it should not be possible for this to be false, but I'm not - // feeling brave today. - const status = rh.status; - try jws.objectField("status"); - try jws.write(status); - - try jws.objectField("statusText"); - try jws.write(@as(std.http.Status, @enumFromInt(status)).phrase() orelse "Unknown"); - } - - { - const mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { - break :blk try Mime.parse(ct); - } - break :blk .unknown; - }; - - try jws.objectField("mimeType"); - try jws.write(mime.contentTypeString()); - try jws.objectField("charset"); - try jws.write(mime.charsetString()); - } - - { - try jws.objectField("timing"); - try jws.write(.{ - .requestTime = transfer.start_time, - .connectEnd = -1, - .connectStart = -1, - .dnsEnd = -1, - .dnsStart = -1, - .proxyEnd = -1, - .proxyStart = -1, - .receiveHeadersEnd = -1, - .receiveHeadersStart = -1, - .sendEnd = -1, - .sendStart = -1, - .sslEnd = -1, - .sslStart = -1, - }); - } - - { - // chromedp doesn't like having duplicate header names. It's pretty - // common to get these from a server (e.g. for Cache-Control), but - // Chrome joins these. So we have to too. - const arena = self.arena; - var it = transfer.responseHeaderIterator(); - var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; - while (it.next()) |hdr| { - const gop = try map.getOrPut(arena, hdr.name); - if (gop.found_existing) { - // yes, chrome joins multi-value headers with a \n - gop.value_ptr.* = try std.mem.join(arena, "\n", &.{ gop.value_ptr.*, hdr.value }); - } else { - gop.value_ptr.* = hdr.value; - } - } - - try jws.objectField("headers"); - try jws.write(std.json.ArrayHashMap([]const u8){ .map = map }); - } - try jws.endObject(); - } -}; - fn idFromRequestId(request_id: []const u8) !u64 { // The requesIid for the original document is its loaderId. if (!std.mem.startsWith(u8, request_id, "REQ-") and !std.mem.startsWith(u8, request_id, "LID-")) { diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 823a5314..fd868af1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -107,9 +107,6 @@ pub const InterceptContext = struct { request: Request, content_length: usize = 0, - auth_challenge: ?http.AuthChallenge = null, - tries: usize = 0, - fn startCallback(response: Response) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); log.debug(.http, "intercept start", .{ .url = self.request.params.url }); @@ -126,16 +123,6 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - switch (response.inner) { - .transfer => |t| { - const status = t.response_header.?.status; - if (status == 401 or status == 407) { - self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - } - }, - else => {}, - } - self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, @@ -166,26 +153,6 @@ pub const InterceptContext = struct { .content_length = self.content_length, }); - // if (self.auth_challenge != null and self.tries < 10) { - // var wait_for_interception = false; - // self.request.params.notification.dispatch(.http_request_auth_required, &.{ - // .request = &self.request, - // .intercept_ctx = self, - // .wait_for_interception = &wait_for_interception, - // }); - - // if (wait_for_interception) { - // log.debug(.http, "intercept auth required", .{ - // .url = self.request.params.url, - // .intercepted = self.layer.intercepted, - // }); - // self.layer.intercepted += 1; - // self.tries += 1; - // // Don't forward done — CDP owns this now, will retry via continueWithAuth - // return; - // } - // } - self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, From ca08f0c56dc2dad363c742e2f67007c39bcf4888 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:10:33 -0700 Subject: [PATCH 18/36] remove blocking from RequestParams --- src/browser/HttpClient.zig | 7 ------- src/browser/ScriptManager.zig | 2 -- src/network/layer/RobotsLayer.zig | 1 - 3 files changed, 10 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 7600ad5a..db3b98b9 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -871,13 +871,6 @@ pub const RequestParams = struct { notification: *Notification, timeout_ms: u32 = 0, - // This is only relevant for intercepted requests. If a request is flagged - // as blocking AND is intercepted, then it'll be up to us to wait until - // we receive a response to the interception. This probably isn't ideal, - // but it's harder for our caller (ScriptManager) to deal with this. One - // reason for that is the Http Client is already a bit CDP-aware. - blocking: bool = false, - const ResourceType = enum { document, xhr, diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index f707f390..12cc6cb5 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -292,7 +292,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .frame_id = frame._frame_id, .loader_id = frame._loader_id, .headers = headers, - .blocking = true, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, @@ -316,7 +315,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .frame_id = frame._frame_id, .loader_id = frame._loader_id, .headers = headers, - .blocking = false, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index eb38b720..aa3d0e5f 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -114,7 +114,6 @@ fn fetchRobotsThenRequest( .url = robots_url, .method = .GET, .headers = headers, - .blocking = false, .frame_id = req.params.frame_id, .loader_id = req.params.loader_id, .cookie_jar = req.params.cookie_jar, From 13cc122e26d4d86d3ee08f0b9238907d837379d1 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:10:40 -0700 Subject: [PATCH 19/36] remove InterceptState --- src/browser/HttpClient.zig | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index db3b98b9..c61c457a 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1089,14 +1089,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - const InterceptState = union(enum) { - not_intercepted, - pending, - @"continue", - abort: anyerror, - fulfilled, - }; - fn releaseConn(self: *Transfer) void { if (self._conn) |conn| { self.client.removeConn(conn); From 87eec578aaeddf8977eff08a8276b6eae3c86fee Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:40:43 -0700 Subject: [PATCH 20/36] use arena pool in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 31 ++++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index fd868af1..041af5f1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -23,9 +23,7 @@ const log = @import("../../log.zig"); const IS_DEBUG = builtin.mode == .Debug; const http = @import("../http.zig"); -const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; -const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; @@ -54,18 +52,21 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - var pre_wrap_req = in_req; - // Wrap callbacks to intercept notifications - const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); + const arena = try client.network.app.arena_pool.acquire(.small, "InterceptionLayer"); + errdefer client.network.app.arena_pool.release(arena); + + const intercept_ctx = try arena.create(InterceptContext); intercept_ctx.* = .{ - .forward = Forward.fromRequest(pre_wrap_req), + .arena = arena, + .client = client, + .forward = Forward.fromRequest(in_req), .layer = self, - .request = pre_wrap_req, + .request = in_req, }; var req = intercept_ctx.forward.wrapRequest( - pre_wrap_req, + in_req, intercept_ctx, .{ .start = InterceptContext.startCallback, @@ -102,6 +103,8 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { } pub const InterceptContext = struct { + arena: std.mem.Allocator, + client: *Client, forward: Forward, layer: *InterceptionLayer, request: Request, @@ -148,6 +151,8 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept done", .{ .url = self.request.params.url, .content_length = self.content_length, @@ -162,6 +167,8 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept error", .{ .url = self.request.params.url, .err = err, @@ -175,6 +182,8 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.request.params.notification.dispatch(.http_request_fail, &.{ .request = &self.request, @@ -192,7 +201,11 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) } self.intercepted -= 1; - return self.next.request(client, req); + self.next.request(client, req) catch |err| { + const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + client.network.app.arena_pool.release(ctx.arena); + return err; + }; } pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { From 175c2cc288b35dfb996f7542d520d9d90bddc1e7 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:48:21 -0700 Subject: [PATCH 21/36] ensure robots params have arena and request id --- src/network/layer/RobotsLayer.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index aa3d0e5f..11b7c8f3 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -111,6 +111,9 @@ fn fetchRobotsThenRequest( try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ + // We have to do this ourselves because we are not going through the top level `request`. + .arena = std.heap.ArenaAllocator.init(client.allocator), + .request_id = client.incrReqId(), .url = robots_url, .method = .GET, .headers = headers, From bb9e238f6c223bbc33528be3713cebb1624ed8dd Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:59:11 -0700 Subject: [PATCH 22/36] Requests now use arenas from the arena pool --- src/browser/HttpClient.zig | 23 +++++++++++++++-------- src/cdp/domains/fetch.zig | 8 ++++---- src/network/layer/InterceptionLayer.zig | 19 +++---------------- src/network/layer/RobotsLayer.zig | 17 ++++++++++------- 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index c61c457a..90d6b8ab 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -400,7 +400,10 @@ pub fn request(self: *Client, req: Request) !void { // Assign Request Id. var our_req = req; our_req.params.request_id = self.incrReqId(); - our_req.params.arena = ArenaAllocator.init(self.allocator); + + const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); + errdefer self.network.app.arena_pool.release(arena); + our_req.params.arena = arena; return self.entry_layer.request(self, our_req); } @@ -854,7 +857,7 @@ fn ensureNoActiveConnection(self: *const Client) !void { pub const RequestParams = struct { /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. - arena: ArenaAllocator = undefined, + arena: Allocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, @@ -893,7 +896,6 @@ pub const RequestParams = struct { pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); - self.arena.deinit(); } }; @@ -918,7 +920,7 @@ pub const Request = struct { pub fn getCookieString(self: *Request) !?[:0]const u8 { const jar = self.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.params.arena.allocator()); + var aw: std.Io.Writer.Allocating = .init(self.params.arena); try jar.forRequest(self.params.url, &aw.writer, .{ .is_http = true, .origin_url = self.params.cookie_origin, @@ -1102,7 +1104,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.deinit(); + self.client.deinitRequest(self.req); self.client.transfer_pool.destroy(self); } @@ -1266,7 +1268,7 @@ pub const Transfer = struct { fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.req.params.arena.allocator(); + const arena = transfer.req.params.arena; transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1443,7 +1445,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {}; } } @@ -1456,7 +1458,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.req.params.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; @@ -1568,6 +1570,11 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { return self.process(transfer); } +pub fn deinitRequest(self: *Client, req: Request) void { + req.deinit(); + self.network.app.arena_pool.release(req.params.arena); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 7eff321e..672e4b92 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -255,7 +255,7 @@ fn continueRequest(cmd: *CDP.Command) !void { .new_url = params.url, }); - const arena = request.params.arena.allocator(); + const arena = request.params.arena; // Update the request with the new parameters if (params.url) |url| { request.params.url = try arena.dupeZ(u8, url); @@ -313,7 +313,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { const request_id = try idFromRequestId(params.requestId); const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; const transfer = pending.transfer; - var request = transfer.req; + const request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -331,7 +331,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { // cancel the request, deinit the transfer on error. errdefer transfer.abortAuthChallenge(); - const arena = request.params.arena.allocator(); + const arena = request.params.arena; transfer.updateCredentials(try std.fmt.allocPrintSentinel( arena, "{s}:{s}", @@ -380,7 +380,7 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try request.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 041af5f1..696ad5de 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -209,14 +209,12 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) } pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { - _ = client; - if (comptime IS_DEBUG) { log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; - defer req.deinit(); + defer client.deinitRequest(req); req.error_callback(req.ctx, error.Abort); } @@ -253,7 +251,7 @@ fn fulfillInner( pub fn fulfillRequest( self: *InterceptionLayer, - _: *Client, + client: *Client, req: Request, status: u16, headers: []const http.Header, @@ -264,21 +262,10 @@ pub fn fulfillRequest( } self.intercepted -= 1; - defer req.deinit(); + defer client.deinitRequest(req); fulfillInner(req, status, headers, body) catch |err| { req.error_callback(req.ctx, err); return err; }; } - -pub fn abortAuthChallenge(self: *InterceptionLayer, req: Request) void { - if (comptime IS_DEBUG) { - log.debug(.http, "abort auth transfer", .{ .intercepted = self.intercepted }); - } - - self.intercepted -= 1; - defer req.deinit(); - req.error_callback(req.ctx, error.AbortAuthChallenge); - return; -} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 11b7c8f3..92303a68 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -66,7 +66,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - defer req.deinit(); + defer client.deinitRequest(req); log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); @@ -108,11 +108,14 @@ fn fetchRobotsThenRequest( const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.fetchRobots"); + errdefer client.network.app.arena_pool.release(new_arena); + try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ // We have to do this ourselves because we are not going through the top level `request`. - .arena = std.heap.ArenaAllocator.init(client.allocator), + .arena = new_arena, .request_id = client.incrReqId(), .url = robots_url, .method = .GET, @@ -145,24 +148,24 @@ fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, a for (queued.value.items) |queued_req| { if (!allowed) { log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { self.next.request(client, queued_req) catch |e| { - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); queued_req.error_callback(queued_req.ctx, e); }; } } } -fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void { var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPendingShutdown: missing queue"); defer queued.value.deinit(self.allocator); for (queued.value.items) |queued_req| { - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); } } @@ -265,6 +268,6 @@ const RobotsContext = struct { defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); - l.flushPendingShutdown(robots_url); + l.flushPendingShutdown(robots_url, client); } }; From d14b75d93bb39d14e284790271014f9e4dba4022 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:09:08 -0700 Subject: [PATCH 23/36] use Request arnea in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 696ad5de..81b231f4 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -53,12 +53,8 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - const arena = try client.network.app.arena_pool.acquire(.small, "InterceptionLayer"); - errdefer client.network.app.arena_pool.release(arena); - - const intercept_ctx = try arena.create(InterceptContext); + const intercept_ctx = try in_req.params.arena.create(InterceptContext); intercept_ctx.* = .{ - .arena = arena, .client = client, .forward = Forward.fromRequest(in_req), .layer = self, @@ -103,7 +99,6 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { } pub const InterceptContext = struct { - arena: std.mem.Allocator, client: *Client, forward: Forward, layer: *InterceptionLayer, @@ -151,7 +146,6 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept done", .{ .url = self.request.params.url, @@ -167,7 +161,6 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept error", .{ .url = self.request.params.url, @@ -182,7 +175,6 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.request.params.notification.dispatch(.http_request_fail, &.{ @@ -203,7 +195,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) self.intercepted -= 1; self.next.request(client, req) catch |err| { const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); - client.network.app.arena_pool.release(ctx.arena); + ctx.client.deinitRequest(req); return err; }; } From fc702794c2ea55e84345f7d5748f1fc3e73ff8cc Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:09:53 -0700 Subject: [PATCH 24/36] use Request Arena in WebBotAuthLayer --- src/network/layer/WebBotAuthLayer.zig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 25260872..6367e7e8 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -42,9 +42,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); - const arena = try client.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); - defer client.network.app.arena_pool.release(arena); - + const arena = req.params.arena; const authority = URL.getHost(req.params.url); try wba.signRequest(arena, &our_req.params.headers, authority); From e56036fb50dfe28d10461b0e93e7a0e15a72164a Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:11:02 -0700 Subject: [PATCH 25/36] use Request Arena in CacheLayer --- src/network/layer/CacheLayer.zig | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 317457b5..cb5b3c77 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -52,8 +52,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { return self.next.request(client, req); } - const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); - errdefer network.app.arena_pool.release(arena); + const arena = req.params.arena; var iter = req.params.headers.iterator(); const req_header_list = try iter.collect(arena); @@ -63,8 +62,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { - defer req.deinit(); - defer network.app.arena_pool.release(arena); + defer client.deinitRequest(req); return serveFromCache(req, &cached); } @@ -208,8 +206,6 @@ const CacheContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); - const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); if (self.pending_metadata) |metadata| { @@ -227,13 +223,11 @@ const CacheContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardShutdown(); } fn errorCallback(ctx: *anyopaque, e: anyerror) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardErr(e); } }; From 152a792c180ba8a8a1717a51579c2bf10c56f942 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:12:20 -0700 Subject: [PATCH 26/36] use Request Arena in RobotsLayer --- src/network/layer/RobotsLayer.zig | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 92303a68..6ac36890 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -53,14 +53,10 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); - const arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer"); - errdefer client.network.app.arena_pool.release(arena); - + const arena = req.params.arena; const robots_url = try URL.getRobotsUrl(arena, req.params.url); if (client.network.robot_store.get(robots_url)) |robot_entry| { - defer client.network.app.arena_pool.release(arena); - switch (robot_entry) { .present => |robots| { const path = URL.getPathname(req.params.url); From c719a522b8ff50eb1f682d3c0c41832fc09bfdf4 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 06:54:46 -0700 Subject: [PATCH 27/36] use lightpanda module log in layers --- src/network/layer/CacheLayer.zig | 3 ++- src/network/layer/InterceptionLayer.zig | 3 ++- src/network/layer/RobotsLayer.zig | 3 ++- src/network/layer/WebBotAuthLayer.zig | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index cb5b3c77..3db0a3c1 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const http = @import("../http.zig"); const Client = @import("../../browser/HttpClient.zig").Client; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 81b231f4..c75ef1a1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -18,7 +18,8 @@ const std = @import("std"); const builtin = @import("builtin"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const IS_DEBUG = builtin.mode == .Debug; diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 6ac36890..d4d5783a 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const URL = @import("../../browser/URL.zig"); const Robots = @import("../Robots.zig"); diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 6367e7e8..7e67af49 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const URL = @import("../../browser/URL.zig"); const WebBotAuth = @import("../WebBotAuth.zig"); From 83b047e66a0a686b5956ab2e1fae5114b518f9e6 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 06:59:16 -0700 Subject: [PATCH 28/36] assert that intercepted isn't 0 before decrementing --- src/browser/HttpClient.zig | 1 + src/network/layer/InterceptionLayer.zig | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 90d6b8ab..b8985fb4 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1563,6 +1563,7 @@ pub const Transfer = struct { pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { if (comptime IS_DEBUG) { + lp.assert(self.interception_layer.intercepted > 0, "HttpClient.continueTransfer", .{ .value = self.interception_layer.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index c75ef1a1..79779427 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -190,6 +190,7 @@ pub const InterceptContext = struct { pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); } @@ -203,6 +204,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted }); log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; @@ -251,6 +253,7 @@ pub fn fulfillRequest( body: ?[]const u8, ) !void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); } From 4de1dc54248cdf8ca6bbb7d1680a87ab3dd6a9a2 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:03:00 -0700 Subject: [PATCH 29/36] properly call error callback in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 79779427..23b34217 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -90,7 +90,11 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { }); if (!wait_for_interception) { - return self.next.request(client, req); + return self.next.request(client, req) catch |err| { + req.error_callback(req.ctx, err); + client.deinitRequest(req); + return err; + }; } self.intercepted += 1; @@ -197,6 +201,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) self.intercepted -= 1; self.next.request(client, req) catch |err| { const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + req.error_callback(req.ctx, err); ctx.client.deinitRequest(req); return err; }; From 3fe774fbfb388bb5f44792afd353fceb747a9e42 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:20:06 -0700 Subject: [PATCH 30/36] pass error all the way up to Layer chain to clean --- src/browser/HttpClient.zig | 7 +++++-- src/network/layer/CacheLayer.zig | 4 +--- src/network/layer/InterceptionLayer.zig | 8 ++------ src/network/layer/RobotsLayer.zig | 7 +------ 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b8985fb4..12b90388 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -402,10 +402,13 @@ pub fn request(self: *Client, req: Request) !void { our_req.params.request_id = self.incrReqId(); const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); - errdefer self.network.app.arena_pool.release(arena); our_req.params.arena = arena; - return self.entry_layer.request(self, our_req); + return self.entry_layer.request(self, our_req) catch |err| { + our_req.error_callback(our_req.ctx, err); + self.deinitRequest(our_req); + return err; + }; } const SyncContext = struct { diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 3db0a3c1..fd8a5e9f 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -63,7 +63,6 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { - defer client.deinitRequest(req); return serveFromCache(req, &cached); } @@ -104,8 +103,7 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { const proceed = try req.header_callback(response); if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; + return error.Abort; } switch (cached.data) { diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 23b34217..ce399294 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -90,11 +90,7 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { }); if (!wait_for_interception) { - return self.next.request(client, req) catch |err| { - req.error_callback(req.ctx, err); - client.deinitRequest(req); - return err; - }; + return self.next.request(client, req); } self.intercepted += 1; @@ -214,8 +210,8 @@ pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) voi } self.intercepted -= 1; - defer client.deinitRequest(req); req.error_callback(req.ctx, error.Abort); + client.deinitRequest(req); } fn fulfillInner( diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index d4d5783a..96ca1b18 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -63,11 +63,8 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - defer client.deinitRequest(req); - log.warn(.http, "blocked by robots", .{ .url = req.params.url }); - req.error_callback(req.ctx, error.RobotsBlocked); - return; + return error.RobotsBlocked; } }, .absent => {}, @@ -85,8 +82,6 @@ fn fetchRobotsThenRequest( robots_url: [:0]const u8, req: Request, ) !void { - errdefer client.network.app.arena_pool.release(arena); - const entry = try self.pending.getOrPut(self.allocator, robots_url); if (!entry.found_existing) { From 1370f6805b0dbce871670770be2d9ae79c827f6f Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:21:57 -0700 Subject: [PATCH 31/36] add a note about cdp callback cb --- src/network/layer/InterceptionLayer.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index ce399294..165cd254 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -187,6 +187,8 @@ pub const InterceptContext = struct { }; // CDP Callbacks +// These handle their own clean up on errors with `self.next.request`. +// This is because they don't pass their error up the chain as they are async callbacks. pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { if (comptime IS_DEBUG) { From 0ecf981f7e34584bc18c31dbc513cf8bcdce88c5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:24:52 -0700 Subject: [PATCH 32/36] add assert to SyncRequest headerCallback --- src/browser/HttpClient.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 12b90388..dfafa683 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -425,6 +425,7 @@ const SyncContext = struct { fn headerCallback(response: Response) anyerror!bool { const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + lp.assert(response.status() != null, "HttpClient.SyncRequest.headerCallback", .{ .value = response.status() }); self.status = response.status().?; if (response.contentLength()) |cl| { try self.body.ensureTotalCapacity(self.allocator, cl); From 24ece021e1fb36889cbc6ead43cd7a1d874f8746 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:29:03 -0700 Subject: [PATCH 33/36] remove interception stuff in Transfer --- src/browser/HttpClient.zig | 63 ++++---------------------------------- 1 file changed, 6 insertions(+), 57 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index dfafa683..a810c535 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1475,64 +1475,13 @@ pub const Transfer = struct { } pub fn responseHeaderIterator(self: *Transfer) HeaderIterator { - if (self._conn) |conn| { - // If we have a connection, than this is a real curl request and we - // iterate through the header that curl maintains. - return .{ .curl = .{ .conn = conn } }; - } - // If there's no handle, it either means this is being called before - // the request is even being made (which would be a bug in the code) - // or when a response was injected via transfer.fulfill. The injected - // header should be iterated, since there is no handle/easy. - return .{ .list = .{ .list = self.response_header.?._injected_headers } }; - } + // We always have a real curl request here. We handle injection up in InterceptionLayer. + lp.assert(self._conn != null, "Transfer.responseHeaderIterator", .{ .value = self._conn != null }); + const conn = self._conn.?; - pub fn fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (transfer._conn != null) { - // should never happen, should have been intercepted/paused, and then - // either continued, aborted or fulfilled once. - @branchHint(.unlikely); - return error.RequestInProgress; - } - - transfer._fulfill(status, headers, body) catch |err| { - transfer.req.error_callback(transfer.req.ctx, err); - return err; - }; - } - - fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - const req = &transfer.req; - if (req.start_callback) |cb| { - try cb(Response.fromTransfer(transfer)); - } - - transfer.response_header = .{ - .status = status, - .url = req.params.url, - .redirect_count = 0, - ._injected_headers = headers, - }; - for (headers) |hdr| { - if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) { - const len = @min(hdr.value.len, ResponseHead.MAX_CONTENT_TYPE_LEN); - @memcpy(transfer.response_header.?._content_type[0..len], hdr.value[0..len]); - transfer.response_header.?._content_type_len = len; - break; - } - } - - lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(Response.fromTransfer(transfer)) == false) { - transfer.abort(error.Abort); - return; - } - - if (body) |b| { - try req.data_callback(Response.fromTransfer(transfer), b); - } - - try req.done_callback(req.ctx); + // If we have a connection, than this is a real curl request and we + // iterate through the header that curl maintains. + return .{ .curl = .{ .conn = conn } }; } // This function should be called during the dataCallback. Calling it after From 1ab445843c5c5c03314fcba54f703ac67a9c67db Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:40:14 -0700 Subject: [PATCH 34/36] better arena management in Robots Layer and Context --- src/network/layer/RobotsLayer.zig | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 96ca1b18..1bfae1b6 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -72,13 +72,12 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { return self.next.request(client, req); } - return self.fetchRobotsThenRequest(client, arena, robots_url, req); + return self.fetchRobotsThenRequest(client, robots_url, req); } fn fetchRobotsThenRequest( self: *RobotsLayer, client: *Client, - arena: std.mem.Allocator, robots_url: [:0]const u8, req: Request, ) !void { @@ -88,11 +87,16 @@ fn fetchRobotsThenRequest( errdefer std.debug.assert(self.pending.remove(robots_url)); entry.value_ptr.* = .empty; - const robots_ctx = try arena.create(RobotsContext); + // This arena is later owned by the Request. It does not need to be cleaned up by us because + // it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers. + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext"); + errdefer client.network.app.arena_pool.release(new_arena); + + const robots_ctx = try new_arena.create(RobotsContext); robots_ctx.* = .{ .layer = self, .client = client, - .arena = arena, + .arena = new_arena, .robots_url = robots_url, .buffer = .empty, }; @@ -100,9 +104,6 @@ fn fetchRobotsThenRequest( const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.fetchRobots"); - errdefer client.network.app.arena_pool.release(new_arena); - try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ @@ -125,8 +126,6 @@ fn fetchRobotsThenRequest( .error_callback = RobotsContext.errorCallback, .shutdown_callback = RobotsContext.shutdownCallback, }); - } else { - client.network.app.arena_pool.release(arena); } try entry.value_ptr.append(self.allocator, req); @@ -202,7 +201,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); var allowed = true; const network = client.network; @@ -246,7 +244,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); log.warn(.http, "robots fetch failed", .{ .err = err }); l.flushPending(client, robots_url, true); @@ -257,7 +254,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); l.flushPendingShutdown(robots_url, client); From 85a5c0f927c5837b4a1f056e5190f94e8c0291e5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:50:57 -0700 Subject: [PATCH 35/36] decrement intercepted and properly deinit on BrowserContext deinit --- src/cdp/CDP.zig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 326e93c5..e646f4b8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,13 +450,22 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager + const http_client = browser.http_client; for (self.intercept_state.pendingIntercepts()) |intercept| { + defer { + lp.assert( + http_client.interception_layer.intercepted > 0, + "BrowserContext.deinit.intercepted", + .{ .value = http_client.interception_layer.intercepted }, + ); + http_client.interception_layer.intercepted -= 1; + } switch (intercept) { .transfer => |t| { t.abort(error.ClientDisconnect); }, .request => |r| { - defer r.deinit(); + defer http_client.deinitRequest(r); r.error_callback(r.ctx, error.ClientDisconnect); }, } From 1057b9de8d59abae399e8b4c466d6464d2cdcf34 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Tue, 28 Apr 2026 06:24:13 -0700 Subject: [PATCH 36/36] toRequestId2 -> toRequestId on CDP --- src/cdp/domains/fetch.zig | 4 ++-- src/cdp/domains/network.zig | 8 ++++---- src/cdp/id.zig | 14 +------------- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 672e4b92..edff1761 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -214,7 +214,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId2(request), // matches the Network REQ-ID + .networkId = &id.toRequestId(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ @@ -446,7 +446,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId2(&request), + .networkId = &id.toRequestId(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 1458bb0c..0554681a 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -262,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId2(msg.request), + .requestId = &id.toRequestId(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -287,7 +287,7 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .loaderId = &id.toLoaderId(req.params.loader_id), .type = req.params.resource_type.string(), .documentURL = frame.url, @@ -310,7 +310,7 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ .frameId = &id.toFrameId(req.params.frame_id), - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .loaderId = &id.toLoaderId(req.params.loader_id), .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo @@ -323,7 +323,7 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request const session_id = bc.session_id orelse return; const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } diff --git a/src/cdp/id.zig b/src/cdp/id.zig index cb304f6c..f6889d24 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -40,20 +40,8 @@ pub fn toLoaderId(id: u32) [14]u8 { // requestId has special requirements. If it's the main document navigation, // then it should match the loader id. -const Transfer = @import("../browser/HttpClient.zig").Transfer; -pub fn toRequestId(transfer: *const Transfer) [14]u8 { - const req = transfer.req; - if (req.params.resource_type == .document) { - return toLoaderId(req.params.loader_id); - } - - var buf: [14]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable; - return buf; -} - const Request = @import("../browser/HttpClient.zig").Request; -pub fn toRequestId2(req: *const Request) [14]u8 { +pub fn toRequestId(req: *const Request) [14]u8 { if (req.params.resource_type == .document) { return toLoaderId(req.params.loader_id); }