From 5dd15aa2cf13148dffc33ba455367266bfd0233b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:17:02 -0700 Subject: [PATCH 01/31] use layers for Cache, Robots and WebBotAuth --- src/browser/HttpClient.zig | 436 ++++++-------------------- src/network/layer/CacheLayer.zig | 239 ++++++++++++++ src/network/layer/Forward.zig | 134 ++++++++ src/network/layer/RobotsLayer.zig | 264 ++++++++++++++++ src/network/layer/WebBotAuthLayer.zig | 52 +++ 5 files changed, 777 insertions(+), 348 deletions(-) create mode 100644 src/network/layer/CacheLayer.zig create mode 100644 src/network/layer/Forward.zig create mode 100644 src/network/layer/RobotsLayer.zig create mode 100644 src/network/layer/WebBotAuthLayer.zig diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0832db1a..f22ab4d1 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -27,7 +27,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); -const Cache = @import("../network/cache/Cache.zig"); const timestamp = @import("../datetime.zig").timestamp; const log = lp.log; @@ -40,8 +39,11 @@ pub const Method = http.Method; pub const Headers = http.Headers; pub const ResponseHead = http.ResponseHead; pub const HeaderIterator = http.HeaderIterator; -const CacheMetadata = Cache.CachedMetadata; -const CachedResponse = Cache.CachedResponse; +const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; + +pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); +pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); +pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -101,10 +103,6 @@ allocator: Allocator, network: *Network, -// Queue of requests that depend on a robots.txt. -// Allows us to fetch the robots.txt just once. -pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, - // Once we have a handle/easy to process a request with, we create a Transfer // which contains the Request as well as any state we need to process the // request. These will come and go with each request. @@ -134,6 +132,37 @@ cdp_client: ?CDPClient = null, max_response_size: usize, +cache_layer: CacheLayer, +robots_layer: RobotsLayer, +web_bot_auth_layer: WebBotAuthLayer, +entry_layer: Layer, + +pub const Context = struct { + network: *Network, + + pub fn newHeaders(self: Context) !http.Headers { + return http.Headers.init(self.network.config.http_headers.user_agent_header); + } +}; + +pub const Layer = struct { + ptr: *anyopaque, + vtable: *const VTable, + + pub const VTable = struct { + request: *const fn (*anyopaque, Context, Request) anyerror!void, + }; + + pub fn request(self: Layer, ctx: Context, req: Request) !void { + return self.vtable.request(self.ptr, ctx, req); + } +}; + +fn layerWith(self: anytype, next: Layer) Layer { + self.next = next; + return self.layer(); +} + // libcurl can monitor arbitrary sockets, this lets us use libcurl to poll // both HTTP data as well as messages from an CDP connection. // Furthermore, we have some tension between blocking scripts and request @@ -175,8 +204,29 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .tls_verify = network.config.tlsVerifyHost(), .obey_robots = network.config.obeyRobots(), .max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32), + + .cache_layer = .{}, + .robots_layer = .{ .allocator = allocator }, + .web_bot_auth_layer = .{}, + .entry_layer = undefined, }; + var next = client.layer(); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + + if (network.config.obeyRobots()) { + next = layerWith(&client.robots_layer, next); + } + + if (network.config.httpCacheDir() != null) { + next = layerWith(&client.cache_layer, next); + } + + client.entry_layer = next; + return client; } @@ -185,17 +235,20 @@ pub fn deinit(self: *Client) void { self.handles.deinit(); self.transfer_pool.deinit(); - - var robots_iter = self.pending_robots_queue.iterator(); - while (robots_iter.next()) |entry| { - entry.value_ptr.deinit(self.allocator); - } - self.pending_robots_queue.deinit(self.allocator); - self.clearUserAgentOverride(); + + self.robots_layer.deinit(self.allocator); + self.allocator.destroy(self); } +pub fn layer(self: *Client) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = _request }, + }; +} + // Set a user agent override. Both the raw UA string and the pre-formatted // "User-Agent: " header string are allocated from self.allocator. pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void { @@ -350,102 +403,12 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { } pub fn request(self: *Client, req: Request) !void { - if (self.obey_robots == false) { - return self.processRequest(req); - } - - const robots_url = try URL.getRobotsUrl(self.allocator, req.url); - errdefer self.allocator.free(robots_url); - - // If we have this robots cached, we can take a fast path. - if (self.network.robot_store.get(robots_url)) |robot_entry| { - defer self.allocator.free(robots_url); - - switch (robot_entry) { - // If we have a found robots entry, we check it. - .present => |robots| { - const path = URL.getPathname(req.url); - if (!robots.isAllowed(path)) { - req.error_callback(req.ctx, error.RobotsBlocked); - return; - } - }, - // Otherwise, we assume we won't find it again. - .absent => {}, - } - - return self.processRequest(req); - } - return self.fetchRobotsThenProcessRequest(robots_url, req); + const ctx = Context{ .network = self.network }; + return self.entry_layer.request(ctx, req); } -fn serveFromCache(req: Request, cached: *const CachedResponse) !void { - const response = Response.fromCached(req.ctx, cached); - defer switch (cached.data) { - .buffer => |_| {}, - .file => |f| f.file.close(), - }; - - if (req.start_callback) |cb| { - try cb(response); - } - - const proceed = try req.header_callback(response); - if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; - } - - switch (cached.data) { - .buffer => |data| { - if (data.len > 0) { - try req.data_callback(response, data); - } - }, - .file => |f| { - const file = f.file; - - var buf: [1024]u8 = undefined; - var file_reader = file.reader(&buf); - try file_reader.seekTo(f.offset); - const reader = &file_reader.interface; - - var read_buf: [1024]u8 = undefined; - var remaining = f.len; - - while (remaining > 0) { - const read_len = @min(read_buf.len, remaining); - const n = try reader.readSliceShort(read_buf[0..read_len]); - if (n == 0) break; - remaining -= n; - try req.data_callback(response, read_buf[0..n]); - } - }, - } - - try req.done_callback(req.ctx); -} - -fn processRequest(self: *Client, req: Request) !void { - if (self.network.cache) |*cache| { - if (req.method == .GET) { - // cache is only used to read the meta data - const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); - defer self.network.app.arena_pool.release(arena); - - var iter = req.headers.iterator(); - const req_header_list = try iter.collect(arena); - - if (cache.get(arena, .{ - .url = req.url, - .timestamp = std.time.timestamp(), - .request_headers = req_header_list.items, - })) |cached| { - defer req.headers.deinit(); - return serveFromCache(req, &cached); - } - } - } +pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { + const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); @@ -479,176 +442,6 @@ fn processRequest(self: *Client, req: Request) !void { } } -const RobotsRequestContext = struct { - client: *Client, - req: Request, - robots_url: [:0]const u8, - buffer: std.ArrayList(u8), - status: u16 = 0, - - pub fn deinit(self: *RobotsRequestContext) void { - self.client.allocator.free(self.robots_url); - self.buffer.deinit(self.client.allocator); - self.client.allocator.destroy(self); - } -}; - -fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void { - const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url); - - if (!entry.found_existing) { - errdefer self.allocator.free(robots_url); - - // If we aren't already fetching this robots, - // we want to create a new queue for it and add this request into it. - entry.value_ptr.* = .empty; - - const ctx = try self.allocator.create(RobotsRequestContext); - errdefer self.allocator.destroy(ctx); - ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty }; - const headers = try self.newHeaders(); - - log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.processRequest(.{ - .ctx = ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, - .header_callback = robotsHeaderCallback, - .data_callback = robotsDataCallback, - .done_callback = robotsDoneCallback, - .error_callback = robotsErrorCallback, - .shutdown_callback = robotsShutdownCallback, - }); - } else { - // Not using our own robots URL, only using the one from the first request. - self.allocator.free(robots_url); - } - - try entry.value_ptr.append(self.allocator, req); -} - -fn robotsHeaderCallback(response: Response) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - // Robots callbacks only happen on real live requests. - const transfer = response.inner.transfer; - - if (transfer.response_header) |hdr| { - log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); - ctx.status = hdr.status; - } - - if (transfer.getContentLength()) |cl| { - try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl); - } - - return true; -} - -fn robotsDataCallback(response: Response, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - try ctx.buffer.appendSlice(ctx.client.allocator, data); -} - -fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - var allowed = true; - - switch (ctx.status) { - 200 => { - if (ctx.buffer.items.len > 0) { - const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes( - ctx.client.getUserAgent(), - ctx.buffer.items, - ) catch blk: { - log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); - // If we fail to parse, we just insert it as absent and ignore. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - break :blk null; - }; - - if (robots) |r| { - try ctx.client.network.robot_store.put(ctx.robots_url, r); - const path = URL.getPathname(ctx.req.url); - allowed = r.isAllowed(path); - } - } - }, - 404 => { - log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); - // If we get a 404, we just insert it as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - else => { - log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status }); - // If we get an unexpected status, we just insert as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - } - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsDoneCallbacke empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); - queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); - } else { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } - } -} - -fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.warn(.http, "robots fetch failed", .{ .err = err }); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - // On error, allow all queued requests to proceed - for (queued.value.items) |queued_req| { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } -} - -fn robotsShutdownCallback(ctx_ptr: *anyopaque) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.debug(.http, "robots fetch shutdown", .{}); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (queued_req.shutdown_callback) |shutdown_cb| { - shutdown_cb(queued_req.ctx); - } - } -} - fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { // The request was intercepted and is blocking. This is messy, but our // callers, the ScriptManager -> Page, don't have a great way to stop the @@ -1028,13 +821,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T } } - if (transfer._pending_cache_metadata) |metadata| { - const cache = &self.network.cache.?; - cache.put(metadata.*, body) catch |err| { - log.warn(.cache, "cache put failed", .{ .err = err }); - }; - } - // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); @@ -1155,6 +941,13 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + frame_id: u32, loader_id: u32, method: Method, @@ -1178,12 +971,12 @@ pub const Request = struct { // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, - start_callback: ?*const fn (response: Response) anyerror!void = null, - header_callback: *const fn (response: Response) anyerror!bool, - data_callback: *const fn (response: Response, data: []const u8) anyerror!void, - done_callback: *const fn (ctx: *anyopaque) anyerror!void, - error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, - shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, const ResourceType = enum { document, @@ -1204,6 +997,10 @@ pub const Request = struct { }; } }; + + pub fn deinit(self: *const Request) void { + self.headers.deinit(); + } }; pub const Response = struct { @@ -1290,7 +1087,6 @@ pub const Transfer = struct { // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, - _pending_cache_metadata: ?*CacheMetadata = null, start_time: u64, aborted: bool = false, @@ -1442,12 +1238,6 @@ pub const Transfer = struct { try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); - // If we have WebBotAuth, sign our request. - if (client.network.web_bot_auth) |*wba| { - const authority = URL.getHost(req.url); - try wba.signRequest(self.arena.allocator(), &header_list, authority); - } - // Add cookies from cookie jar. if (try self.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); @@ -1693,56 +1483,6 @@ pub const Transfer = struct { return err; }; - if (transfer.client.network.cache != null and transfer.req.method == .GET) { - const rh = &transfer.response_header.?; - const allocator = transfer.arena.allocator(); - - const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; - - const maybe_cm = try Cache.tryCache( - allocator, - std.time.timestamp(), - transfer.url, - rh.status, - rh.contentType(), - if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, - vary, - if (conn.getResponseHeader("age", 0)) |h| h.value else null, - conn.getResponseHeader("set-cookie", 0) != null, - conn.getResponseHeader("authorization", 0) != null, - ); - - if (maybe_cm) |cm| { - var iter = transfer.responseHeaderIterator(); - var header_list = try iter.collect(allocator); - const end_of_response = header_list.items.len; - - if (vary) |vary_str| { - var req_it = transfer.req.headers.iterator(); - - while (req_it.next()) |hdr| { - var vary_iter = std.mem.splitScalar(u8, vary_str, ','); - - while (vary_iter.next()) |part| { - const name = std.mem.trim(u8, part, &std.ascii.whitespace); - if (std.ascii.eqlIgnoreCase(hdr.name, name)) { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }); - } - } - } - } - - const metadata = try transfer.arena.allocator().create(CacheMetadata); - metadata.* = cm; - metadata.headers = header_list.items[0..end_of_response]; - metadata.vary_headers = header_list.items[end_of_response..]; - transfer._pending_cache_metadata = metadata; - } - } - return proceed and transfer.aborted == false; } diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig new file mode 100644 index 00000000..60dd1f2d --- /dev/null +++ b/src/network/layer/CacheLayer.zig @@ -0,0 +1,239 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const http = @import("../http.zig"); +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const Cache = @import("../cache/Cache.zig"); +const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata; +const CachedResponse = @import("../cache/Cache.zig").CachedResponse; +const Forward = @import("Forward.zig"); + +const CacheLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *CacheLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *CacheLayer = @ptrCast(@alignCast(ptr)); + const network = ctx.network; + + if (req.method != .GET) { + return self.next.request(ctx, req); + } + + const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); + errdefer network.app.arena_pool.release(arena); + + var iter = req.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (network.cache.?.get(arena, .{ + .url = req.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { + defer req.deinit(); + defer network.app.arena_pool.release(arena); + return serveFromCache(req, &cached); + } + + const cache_ctx = try arena.create(CacheContext); + cache_ctx.* = .{ + .arena = arena, + .context = ctx, + .forward = Forward.fromRequest(req), + .req_url = req.url, + .req_headers = req.headers, + }; + + const wrapped = cache_ctx.forward.wrapRequest( + req, + cache_ctx, + .{ + .start = CacheContext.startCallback, + .header = CacheContext.headerCallback, + .done = CacheContext.doneCallback, + .shutdown = CacheContext.shutdownCallback, + .err = CacheContext.errorCallback, + }, + ); + + return self.next.request(ctx, wrapped); +} + +fn serveFromCache(req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer switch (cached.data) { + .buffer => |_| {}, + .file => |f| f.file.close(), + }; + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + req.error_callback(req.ctx, error.Abort); + return; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |f| { + const file = f.file; + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + const reader = &file_reader.interface; + var read_buf: [1024]u8 = undefined; + var remaining = f.len; + while (remaining > 0) { + const read_len = @min(read_buf.len, remaining); + const n = try reader.readSliceShort(read_buf[0..read_len]); + if (n == 0) break; + remaining -= n; + try req.data_callback(response, read_buf[0..n]); + } + }, + } + + try req.done_callback(req.ctx); +} + +const CacheContext = struct { + arena: std.mem.Allocator, + context: Context, + transfer: ?*Transfer = null, + forward: Forward, + req_url: [:0]const u8, + req_headers: http.Headers, + pending_metadata: ?*CachedMetadata = null, + + fn startCallback(response: Response) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + self.transfer = response.inner.transfer; + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + const allocator = self.arena; + + const transfer = response.inner.transfer; + var rh = &transfer.response_header.?; + + const conn = transfer._conn.?; + + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + + const maybe_cm = try Cache.tryCache( + allocator, + std.time.timestamp(), + transfer.url, + rh.status, + rh.contentType(), + if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, + vary, + if (conn.getResponseHeader("age", 0)) |h| h.value else null, + conn.getResponseHeader("set-cookie", 0) != null, + conn.getResponseHeader("authorization", 0) != null, + ); + + if (maybe_cm) |cm| { + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + + if (vary) |vary_str| { + var req_it = self.req_headers.iterator(); + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + } + + const metadata = try allocator.create(CachedMetadata); + metadata.* = cm; + metadata.headers = header_list.items[0..end_of_response]; + metadata.vary_headers = header_list.items[end_of_response..]; + self.pending_metadata = metadata; + } + + return self.forward.forwardHeader(response); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + + const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); + + if (self.pending_metadata) |metadata| { + const cache = &self.context.network.cache.?; + + log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); + cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { + log.warn(.http, "cache put failed", .{ .err = err }); + }; + log.debug(.browser, "http.cache.put", .{ .url = self.req_url }); + } + + return self.forward.forwardDone(); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + self.forward.forwardShutdown(); + } + + fn errorCallback(ctx: *anyopaque, e: anyerror) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + defer self.context.network.app.arena_pool.release(self.arena); + self.forward.forwardErr(e); + } +}; diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig new file mode 100644 index 00000000..b11ff23f --- /dev/null +++ b/src/network/layer/Forward.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; + +const Forward = @This(); + +ctx: *anyopaque, +start: ?Request.StartCallback, +header: Request.HeaderCallback, +data: Request.DataCallback, +done: Request.DoneCallback, +err: Request.ErrorCallback, +shutdown: ?Request.ShutdownCallback, + +pub fn fromRequest(req: Request) Forward { + return .{ + .ctx = req.ctx, + .start = req.start_callback, + .header = req.header_callback, + .data = req.data_callback, + .done = req.done_callback, + .err = req.error_callback, + .shutdown = req.shutdown_callback, + }; +} + +pub const Overrides = struct { + start: ?Request.StartCallback = null, + header: ?Request.HeaderCallback = null, + data: ?Request.DataCallback = null, + done: ?Request.DoneCallback = null, + err: ?Request.ErrorCallback = null, + shutdown: ?Request.ShutdownCallback = null, +}; + +pub fn wrapRequest( + self: *Forward, + req: Request, + new_ctx: anytype, + overrides: Overrides, +) Request { + const T = @TypeOf(new_ctx.*); + const PassthroughT = makePassthrough(T, "forward"); + var wrapped = req; + wrapped.ctx = new_ctx; + wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; + wrapped.header_callback = overrides.header orelse PassthroughT.header; + wrapped.data_callback = overrides.data orelse PassthroughT.data; + wrapped.done_callback = overrides.done orelse PassthroughT.done; + wrapped.error_callback = overrides.err orelse PassthroughT.err; + wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null; + return wrapped; +} + +fn makePassthrough(comptime T: type, comptime field: []const u8) type { + return struct { + pub fn start(response: Response) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardStart(response); + } + + pub fn header(response: Response) anyerror!bool { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardHeader(response); + } + + pub fn data(response: Response, chunk: []const u8) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardData(response, chunk); + } + + pub fn done(ctx_ptr: *anyopaque) anyerror!void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + return @field(self, field).forwardDone(); + } + + pub fn err(ctx_ptr: *anyopaque, e: anyerror) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardErr(e); + } + + pub fn shutdown(ctx_ptr: *anyopaque) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardShutdown(); + } + }; +} + +pub fn forwardStart(self: Forward, response: Response) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + if (self.start) |cb| try cb(fwd); +} + +pub fn forwardHeader(self: Forward, response: Response) anyerror!bool { + var fwd = response; + fwd.ctx = self.ctx; + return self.header(fwd); +} + +pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + return self.data(fwd, chunk); +} + +pub fn forwardDone(self: Forward) anyerror!void { + return self.done(self.ctx); +} + +pub fn forwardErr(self: Forward, e: anyerror) void { + self.err(self.ctx, e); +} + +pub fn forwardShutdown(self: Forward) void { + if (self.shutdown) |cb| cb(self.ctx); +} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig new file mode 100644 index 00000000..0119d747 --- /dev/null +++ b/src/network/layer/RobotsLayer.zig @@ -0,0 +1,264 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const URL = @import("../../browser/URL.zig"); +const Robots = @import("../Robots.zig"); +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const RobotsLayer = @This(); + +next: Layer = undefined, +allocator: std.mem.Allocator, +pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty, + +pub fn layer(self: *RobotsLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { + var it = self.pending.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(allocator); + } + self.pending.deinit(allocator); +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); + + const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); + errdefer ctx.network.app.arena_pool.release(arena); + + const robots_url = try URL.getRobotsUrl(arena, req.url); + + if (ctx.network.robot_store.get(robots_url)) |robot_entry| { + defer ctx.network.app.arena_pool.release(arena); + + switch (robot_entry) { + .present => |robots| { + const path = URL.getPathname(req.url); + + if (!robots.isAllowed(path)) { + log.warn(.http, "blocked by robots", .{ .url = req.url }); + req.error_callback(req.ctx, error.RobotsBlocked); + return; + } + }, + .absent => {}, + } + return self.next.request(ctx, req); + } + + return self.fetchRobotsThenRequest(ctx, arena, robots_url, req); +} + +fn fetchRobotsThenRequest( + self: *RobotsLayer, + ctx: Context, + arena: std.mem.Allocator, + robots_url: [:0]const u8, + req: Request, +) !void { + errdefer ctx.network.app.arena_pool.release(arena); + + const entry = try self.pending.getOrPut(self.allocator, robots_url); + + if (!entry.found_existing) { + errdefer std.debug.assert(self.pending.remove(robots_url)); + entry.value_ptr.* = .empty; + + const robots_ctx = try arena.create(RobotsContext); + robots_ctx.* = .{ + .layer = self, + .ctx = ctx, + .arena = arena, + .robots_url = robots_url, + .buffer = .empty, + }; + + const headers = try ctx.newHeaders(); + log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + + try self.next.request(ctx, .{ + .ctx = robots_ctx, + .url = robots_url, + .method = .GET, + .headers = headers, + .blocking = false, + .frame_id = req.frame_id, + .loader_id = req.loader_id, + .cookie_jar = req.cookie_jar, + .cookie_origin = req.cookie_origin, + .notification = req.notification, + .resource_type = .fetch, + .header_callback = RobotsContext.headerCallback, + .data_callback = RobotsContext.dataCallback, + .done_callback = RobotsContext.doneCallback, + .error_callback = RobotsContext.errorCallback, + .shutdown_callback = RobotsContext.shutdownCallback, + }); + } else { + ctx.network.app.arena_pool.release(arena); + } + + try entry.value_ptr.append(self.allocator, req); +} + +fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPending: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + if (!allowed) { + log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); + defer queued_req.deinit(); + queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); + } else { + self.next.request(ctx, queued_req) catch |e| { + defer queued_req.deinit(); + queued_req.error_callback(queued_req.ctx, e); + }; + } + } +} + +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPendingShutdown: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + defer queued_req.deinit(); + if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); + } +} + +const RobotsContext = struct { + layer: *RobotsLayer, + arena: std.mem.Allocator, + ctx: Context, + robots_url: [:0]const u8, + buffer: std.ArrayListUnmanaged(u8), + status: u16 = 0, + + fn deinit(self: *RobotsContext) void { + self.buffer.deinit(self.layer.allocator); + self.layer.allocator.destroy(self); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + switch (response.inner) { + .transfer => |t| { + if (t.response_header) |hdr| { + log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url }); + self.status = hdr.status; + } + if (t.getContentLength()) |cl| { + try self.buffer.ensureTotalCapacity(self.arena, cl); + } + }, + .cached => {}, + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + try self.buffer.appendSlice(self.arena, data); + } + + fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + var allowed = true; + const network = ctx.network; + + switch (self.status) { + 200 => { + if (self.buffer.items.len > 0) { + const robots: ?Robots = network.robot_store.robotsFromBytes( + network.config.http_headers.user_agent, + self.buffer.items, + ) catch blk: { + log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url }); + try network.robot_store.putAbsent(robots_url); + break :blk null; + }; + if (robots) |r| { + try network.robot_store.put(robots_url, r); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].url); + allowed = r.isAllowed(path); + } + } + }, + 404 => { + log.debug(.http, "robots not found", .{ .url = robots_url }); + try network.robot_store.putAbsent(robots_url); + }, + else => { + log.debug(.http, "unexpected status on robots", .{ + .url = robots_url, + .status = self.status, + }); + try network.robot_store.putAbsent(robots_url); + }, + } + + l.flushPending(ctx, robots_url, allowed); + } + + fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + log.warn(.http, "robots fetch failed", .{ .err = err }); + l.flushPending(ctx, robots_url, true); + } + + fn shutdownCallback(ctx_ptr: *anyopaque) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const ctx = self.ctx; + const robots_url = self.robots_url; + defer ctx.network.app.arena_pool.release(self.arena); + + log.debug(.http, "robots fetch shutdown", .{}); + l.flushPendingShutdown(robots_url); + } +}; diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig new file mode 100644 index 00000000..42bf99a2 --- /dev/null +++ b/src/network/layer/WebBotAuthLayer.zig @@ -0,0 +1,52 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const URL = @import("../../browser/URL.zig"); +const WebBotAuth = @import("../WebBotAuth.zig"); +const Context = @import("../../browser/HttpClient.zig").Context; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const WebBotAuthLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *WebBotAuthLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { + const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); + var our_req = req; + + const wba = ctx.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + + const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); + defer ctx.network.app.arena_pool.release(arena); + + const authority = URL.getHost(req.url); + try wba.signRequest(arena, &our_req.headers, authority); + + return self.next.request(ctx, our_req); +} From 46d0b34c54a748b5f82e34b70bb871852325b68b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:36:24 -0700 Subject: [PATCH 02/31] add RequestParams and SyncRequest --- src/browser/Frame.zig | 22 ++- src/browser/HttpClient.zig | 214 +++++++++++++++------- src/browser/ScriptManager.zig | 62 ++++--- src/browser/webapi/Worker.zig | 20 +- src/browser/webapi/net/Fetch.zig | 22 ++- src/browser/webapi/net/XMLHttpRequest.zig | 24 +-- src/cdp/domains/fetch.zig | 12 +- src/cdp/domains/network.zig | 18 +- src/cdp/id.zig | 4 +- src/network/layer/CacheLayer.zig | 10 +- src/network/layer/RobotsLayer.zig | 32 ++-- src/network/layer/WebBotAuthLayer.zig | 4 +- 12 files changed, 273 insertions(+), 171 deletions(-) diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index e407ab12..3e8ff90f 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -647,16 +647,18 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo http_client.request(.{ .ctx = self, - .url = self.url, - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .method = opts.method, - .headers = headers, - .body = opts.body, - .cookie_jar = &session.cookie_jar, - .cookie_origin = self.url, - .resource_type = .document, - .notification = self._session.notification, + .params = .{ + .url = self.url, + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .method = opts.method, + .headers = headers, + .body = opts.body, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.url, + .resource_type = .document, + .notification = self._session.notification, + }, .header_callback = frameHeaderDoneCallback, .data_callback = frameDataCallback, .done_callback = frameDoneCallback, diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index f22ab4d1..0dbee3bd 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -338,7 +338,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { const transfer: *Transfer = @fieldParentPtr("_node", node); if (comptime abort_all) { transfer.kill(); - } else if (transfer.req.frame_id == frame_id) { + } else if (transfer.req.params.frame_id == frame_id) { q.remove(node); transfer.kill(); } @@ -375,7 +375,7 @@ fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_ const conn: *http.Connection = @fieldParentPtr("node", node); switch (conn.transport) { .http => |transfer| { - if ((comptime abort_all) or transfer.req.frame_id == frame_id) { + if ((comptime abort_all) or transfer.req.params.frame_id == frame_id) { transfer.kill(); } }, @@ -402,20 +402,15 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn request(self: *Client, req: Request) !void { - const ctx = Context{ .network = self.network }; - return self.entry_layer.request(ctx, req); -} - pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); + transfer.req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); var wait_for_interception = false; - transfer.req.notification.dispatch(.http_request_intercept, &.{ + transfer.req.params.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception, }); @@ -430,7 +425,7 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } transfer._intercept_state = .pending; - if (req.blocking == false) { + if (req.params.blocking == false) { // The request was interecepted, but it isn't a blocking request, so we // dont' need to block this call. The request will be unblocked // asynchronously via either continueTransfer or abortTransfer @@ -442,6 +437,81 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } } +pub fn request(self: *Client, req: Request) !void { + const ctx = Context{ .network = self.network }; + return self.entry_layer.request(ctx, req); +} + +const SyncContext = struct { + allocator: Allocator, + completion: union(enum) { + in_progress: void, + done: void, + err: anyerror, + shutdown: void, + } = .in_progress, + + status: u16 = 0, + body: std.ArrayList(u8), + + fn headerCallback(response: Response) anyerror!bool { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + self.status = response.status().?; + if (response.contentLength()) |cl| { + try self.body.ensureTotalCapacity(self.allocator, cl); + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + try self.body.appendSlice(self.allocator, data); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .done; + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .{ .err = err }; + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .shutdown; + } +}; + +pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) !SyncResponse { + var sync_ctx = SyncContext{ .allocator = allocator, .body = .empty }; + errdefer sync_ctx.body.deinit(allocator); + + try self.request(.{ + .params = params, + .ctx = &sync_ctx, + .header_callback = SyncContext.headerCallback, + .data_callback = SyncContext.dataCallback, + .done_callback = SyncContext.doneCallback, + .error_callback = SyncContext.errorCallback, + .shutdown_callback = SyncContext.shutdownCallback, + }); + + while (sync_ctx.completion == .in_progress) { + _ = try self.tick(200); + } + + switch (sync_ctx.completion) { + .in_progress => @panic("Impossible to be in progress here."), + .done, .shutdown => return .{ + .status = sync_ctx.status, + .body = sync_ctx.body, + }, + .err => |e| return e, + } +} + fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { // The request was intercepted and is blocking. This is messy, but our // callers, the ScriptManager -> Page, don't have a great way to stop the @@ -507,7 +577,7 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { } self.intercepted -= 1; - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { return self.process(transfer); } transfer._intercept_state = .@"continue"; @@ -521,7 +591,7 @@ pub fn abortTransfer(self: *Client, transfer: *Transfer) void { } self.intercepted -= 1; - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { transfer.abort(error.Abort); } transfer._intercept_state = .{ .abort = error.Abort }; @@ -536,7 +606,7 @@ pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: self.intercepted -= 1; try transfer.fulfill(status, headers, body); - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { transfer.deinit(); return; } @@ -554,7 +624,7 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.headers.deinit(); + errdefer req.params.headers.deinit(); const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); @@ -564,7 +634,7 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .start_time = timestamp(.monotonic), .arena = ArenaAllocator.init(self.allocator), .id = id, - .url = req.url, + .url = req.params.url, .req = req, .client = self, }; @@ -581,7 +651,7 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.notification.dispatch(.http_request_fail, &.{ + transfer.req.params.notification.dispatch(.http_request_fail, &.{ .transfer = transfer, .err = err, }); @@ -704,7 +774,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // TODO give a way to configure the number of auth retries. if (transfer._auth_challenge != null and transfer._tries < 10) { var wait_for_interception = false; - transfer.req.notification.dispatch( + transfer.req.params.notification.dispatch( .http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, ); @@ -720,7 +790,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // release the easy handle back into the pool. The transfer // is still valid/alive (just has no handle). transfer.releaseConn(); - if (!transfer.req.blocking) { + if (!transfer.req.params.blocking) { // In the case of an async request, we can just "forget" // about this transfer until it gets updated asynchronously // from some CDP command. @@ -810,7 +880,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.notification.dispatch(.http_response_data, &.{ + transfer.req.params.notification.dispatch(.http_response_data, &.{ .data = body, .transfer = transfer, }); @@ -827,7 +897,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T try transfer.req.done_callback(transfer.req.ctx); - transfer.req.notification.dispatch(.http_request_done, &.{ + transfer.req.params.notification.dispatch(.http_request_done, &.{ .transfer = transfer, }); @@ -940,14 +1010,7 @@ fn ensureNoActiveConnection(self: *const Client) !void { } } -pub const Request = struct { - pub const StartCallback = *const fn (response: Response) anyerror!void; - pub const HeaderCallback = *const fn (response: Response) anyerror!bool; - pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; - pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; - pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; - pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; - +pub const RequestParams = struct { frame_id: u32, loader_id: u32, method: Method, @@ -968,16 +1031,6 @@ pub const Request = struct { // reason for that is the Http Client is already a bit CDP-aware. blocking: bool = false, - // arbitrary data that can be associated with this request - ctx: *anyopaque = undefined, - - start_callback: ?StartCallback = null, - header_callback: HeaderCallback, - data_callback: DataCallback, - done_callback: DoneCallback, - error_callback: ErrorCallback, - shutdown_callback: ?ShutdownCallback = null, - const ResourceType = enum { document, xhr, @@ -998,11 +1051,35 @@ pub const Request = struct { } }; - pub fn deinit(self: *const Request) void { + pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); } }; +pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + + params: RequestParams, + // arbitrary data that can be associated with this request + ctx: *anyopaque = undefined, + + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, + + pub fn deinit(self: *const Request) void { + self.params.deinit(); + } +}; + pub const Response = struct { ctx: *anyopaque, inner: union(enum) { @@ -1078,6 +1155,15 @@ pub const Response = struct { } }; +pub const SyncResponse = struct { + status: u16, + body: std.ArrayList(u8), + + pub fn deinit(self: *SyncResponse, allocator: Allocator) void { + self.body.deinit(allocator); + } +}; + pub const Transfer = struct { arena: ArenaAllocator, id: u32 = 0, @@ -1146,7 +1232,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.headers.deinit(); + self.req.deinit(); self.arena.deinit(); self.client.transfer_pool.destroy(self); } @@ -1204,7 +1290,7 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.notification.dispatch(.http_request_fail, &.{ + self.req.params.notification.dispatch(.http_request_fail, &.{ .transfer = self, .err = err, }); @@ -1226,15 +1312,15 @@ pub const Transfer = struct { try conn.setProxy(client.http_proxy); try conn.setTlsVerify(client.tls_verify, client.use_proxy); - try conn.setURL(req.url); - try conn.setMethod(req.method); - if (req.body) |b| { + try conn.setURL(req.params.url); + try conn.setMethod(req.params.method); + if (req.params.body) |b| { try conn.setBody(b); } else { try conn.setGetMode(); } - var header_list = req.headers; + var header_list = req.params.headers; try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); @@ -1246,12 +1332,12 @@ pub const Transfer = struct { conn.transport = .{ .http = self }; // Per-request timeout override (e.g. XHR timeout) - if (req.timeout_ms > 0) { - try conn.setTimeout(req.timeout_ms); + if (req.params.timeout_ms > 0) { + try conn.setTimeout(req.params.timeout_ms); } // add credentials - if (req.credentials) |creds| { + if (req.params.credentials) |creds| { if (self._auth_challenge != null and self._auth_challenge.?.source == .proxy) { try conn.setProxyCredentials(creds); } else { @@ -1301,12 +1387,12 @@ pub const Transfer = struct { } pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.cookie_jar orelse return null; + const jar = self.req.params.cookie_jar orelse return null; var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.url, &aw.writer, .{ + try jar.forRequest(self.req.params.url, &aw.writer, .{ .is_http = true, - .origin_url = self.req.cookie_origin, - .is_navigation = self.req.resource_type == .document, + .origin_url = self.req.params.cookie_origin, + .is_navigation = self.req.params.resource_type == .document, }); const written = aw.written(); if (written.len == 0) return null; @@ -1316,7 +1402,7 @@ pub const Transfer = struct { pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; - return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); + return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); } pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { @@ -1324,7 +1410,7 @@ pub const Transfer = struct { self.url = url; // for the request itself - self.req.url = url; + self.req.params.url = url; } fn handleRedirect(transfer: *Transfer) !void { @@ -1338,7 +1424,7 @@ pub const Transfer = struct { } // retrieve cookies from the redirect's response. - if (req.cookie_jar) |jar| { + if (req.params.cookie_jar) |jar| { var i: usize = 0; while (conn.getResponseHeader("set-cookie", i)) |ct| : (i += 1) { try jar.populateFromResponse(transfer.url, ct.value); @@ -1382,8 +1468,8 @@ pub const Transfer = struct { // 307, 308 → keep method and body. const status = try conn.getResponseCode(); if (status == 301 or status == 302 or status == 303) { - req.method = .GET; - req.body = null; + req.params.method = .GET; + req.params.body = null; } } @@ -1410,11 +1496,11 @@ pub const Transfer = struct { } pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { - self.req.credentials = userpwd; + self.req.params.credentials = userpwd; } pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const http.Header) !void { - self.req.headers.deinit(); + self.req.params.headers.deinit(); var buf: std.ArrayList(u8) = .empty; var new_headers = try self.client.newHeaders(); @@ -1426,7 +1512,7 @@ pub const Transfer = struct { try buf.append(allocator, 0); // null terminated try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); } - self.req.headers = new_headers; + self.req.params.headers = new_headers; } // abortAuthChallenge is called when an auth challenge interception is @@ -1438,7 +1524,7 @@ pub const Transfer = struct { log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); } self.client.intercepted -= 1; - if (!self.req.blocking) { + if (!self.req.params.blocking) { self.abort(error.AbortAuthChallenge); return; } @@ -1454,7 +1540,7 @@ pub const Transfer = struct { try transfer.buildResponseHeader(conn); - if (transfer.req.cookie_jar) |jar| { + if (transfer.req.params.cookie_jar) |jar| { var i: usize = 0; while (true) { const ct = conn.getResponseHeader("set-cookie", i); @@ -1474,7 +1560,7 @@ pub const Transfer = struct { } } - transfer.req.notification.dispatch(.http_response_header_done, &.{ + transfer.req.params.notification.dispatch(.http_response_header_done, &.{ .transfer = transfer, }); @@ -1574,7 +1660,7 @@ pub const Transfer = struct { transfer.response_header = .{ .status = status, - .url = req.url, + .url = req.params.url, .redirect_count = 0, ._injected_headers = headers, }; diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 5f82b88e..3215eb7b 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -291,17 +291,19 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e defer self.is_evaluating = was_evaluating; try self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .blocking = is_blocking, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -407,16 +409,18 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -513,16 +517,18 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), .ctx = script, - .resource_type = .script, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .resource_type = .script, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 558d4cce..81d74116 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -95,15 +95,17 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { const http_client = session.browser.http_client; http_client.request(.{ .ctx = self, - .url = resolved_url, - .method = .GET, - .headers = try http_client.newHeaders(), - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .resource_type = .script, - .cookie_jar = &session.cookie_jar, - .cookie_origin = resolved_url, - .notification = session.notification, + .params = .{ + .url = resolved_url, + .method = .GET, + .headers = try http_client.newHeaders(), + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .resource_type = .script, + .cookie_jar = &session.cookie_jar, + .cookie_origin = resolved_url, + .notification = session.notification, + }, .header_callback = httpHeaderCallback, .data_callback = httpDataCallback, .done_callback = httpDoneCallback, diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 623037d6..777f8311 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -94,16 +94,18 @@ pub fn init(input: Input, options: ?InitOpts, frame: *Frame) !js.Promise { try http_client.request(.{ .ctx = fetch, - .url = request._url, - .method = request._method, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = request._body, - .headers = headers, - .resource_type = .fetch, - .cookie_jar = cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = request._url, + .method = request._method, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = request._body, + .headers = headers, + .resource_type = .fetch, + .cookie_jar = cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 35f7f7fa..19a7676b 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -257,17 +257,19 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { http_client.request(.{ .ctx = self, - .url = self._url, - .method = self._method, - .headers = headers, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = self._request_body, - .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, - .cookie_origin = frame.url, - .resource_type = .xhr, - .timeout_ms = self._timeout, - .notification = frame._session.notification, + .params = .{ + .url = self._url, + .method = self._method, + .headers = headers, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = self._request_body, + .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, + .cookie_origin = frame.url, + .resource_type = .xhr, + .timeout_ms = self._timeout, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index d0a1d1db..57d0bd56 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -198,9 +198,9 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. try bc.cdp.sendEvent("Fetch.requestPaused", .{ .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), + .frameId = &id.toFrameId(transfer.req.params.frame_id), .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .resourceType = switch (transfer.req.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", @@ -251,7 +251,7 @@ fn continueRequest(cmd: *CDP.Command) !void { try transfer.updateURL(try arena.dupeZ(u8, url)); } if (params.method) |method| { - transfer.req.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + transfer.req.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { @@ -265,7 +265,7 @@ fn continueRequest(cmd: *CDP.Command) !void { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.body = body; + transfer.req.params.body = body; } try bc.cdp.browser.http_client.continueTransfer(transfer); @@ -402,9 +402,9 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati try bc.cdp.sendEvent("Fetch.authRequired", .{ .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), + .frameId = &id.toFrameId(transfer.req.params.frame_id), .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .resourceType = switch (transfer.req.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index ff5778e9..74c80197 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -275,20 +275,20 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques const transfer = msg.transfer; const req = &transfer.req; - const frame_id = req.frame_id; + const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; // Modify request with extra CDP headers for (bc.extra_headers.items) |extra| { - try req.headers.add(extra); + try req.params.headers.add(extra); } // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .type = req.resource_type.string(), + .loaderId = &id.toLoaderId(req.params.loader_id), + .type = req.params.resource_type.string(), .documentURL = frame.url, .request = TransferAsRequestWriter.init(transfer), .initiator = .{ .type = "other" }, @@ -309,9 +309,9 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ - .frameId = &id.toFrameId(req.frame_id), + .frameId = &id.toFrameId(req.params.frame_id), .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), + .loaderId = &id.toLoaderId(req.params.loader_id), .response = TransferAsResponseWriter.init(arena, transfer), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); @@ -359,18 +359,18 @@ pub const TransferAsRequestWriter = struct { { try jws.objectField("method"); - try jws.write(@tagName(transfer.req.method)); + try jws.write(@tagName(transfer.req.params.method)); } { try jws.objectField("hasPostData"); - try jws.write(transfer.req.body != null); + try jws.write(transfer.req.params.body != null); } { try jws.objectField("headers"); try jws.beginObject(); - var it = transfer.req.headers.iterator(); + var it = transfer.req.params.headers.iterator(); while (it.next()) |hdr| { try jws.objectField(hdr.name); try jws.write(hdr.value); diff --git a/src/cdp/id.zig b/src/cdp/id.zig index a5d1286f..5dc3a6c2 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -43,8 +43,8 @@ pub fn toLoaderId(id: u32) [14]u8 { const Transfer = @import("../browser/HttpClient.zig").Transfer; pub fn toRequestId(transfer: *const Transfer) [14]u8 { const req = transfer.req; - if (req.resource_type == .document) { - return toLoaderId(req.loader_id); + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); } var buf: [14]u8 = undefined; diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 60dd1f2d..7bc36e39 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -48,18 +48,18 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const self: *CacheLayer = @ptrCast(@alignCast(ptr)); const network = ctx.network; - if (req.method != .GET) { + if (req.params.method != .GET) { return self.next.request(ctx, req); } const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); errdefer network.app.arena_pool.release(arena); - var iter = req.headers.iterator(); + var iter = req.params.headers.iterator(); const req_header_list = try iter.collect(arena); if (network.cache.?.get(arena, .{ - .url = req.url, + .url = req.params.url, .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { @@ -73,8 +73,8 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { .arena = arena, .context = ctx, .forward = Forward.fromRequest(req), - .req_url = req.url, - .req_headers = req.headers, + .req_url = req.params.url, + .req_headers = req.params.headers, }; const wrapped = cache_ctx.forward.wrapRequest( diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 0119d747..714d205b 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -56,17 +56,17 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); errdefer ctx.network.app.arena_pool.release(arena); - const robots_url = try URL.getRobotsUrl(arena, req.url); + const robots_url = try URL.getRobotsUrl(arena, req.params.url); if (ctx.network.robot_store.get(robots_url)) |robot_entry| { defer ctx.network.app.arena_pool.release(arena); switch (robot_entry) { .present => |robots| { - const path = URL.getPathname(req.url); + const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - log.warn(.http, "blocked by robots", .{ .url = req.url }); + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); return; } @@ -108,16 +108,18 @@ fn fetchRobotsThenRequest( try self.next.request(ctx, .{ .ctx = robots_ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, + .params = .{ + .url = robots_url, + .method = .GET, + .headers = headers, + .blocking = false, + .frame_id = req.params.frame_id, + .loader_id = req.params.loader_id, + .cookie_jar = req.params.cookie_jar, + .cookie_origin = req.params.cookie_origin, + .notification = req.params.notification, + .resource_type = .fetch, + }, .header_callback = RobotsContext.headerCallback, .data_callback = RobotsContext.dataCallback, .done_callback = RobotsContext.doneCallback, @@ -138,7 +140,7 @@ fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allo for (queued.value.items) |queued_req| { if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); + log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { @@ -219,7 +221,7 @@ const RobotsContext = struct { }; if (robots) |r| { try network.robot_store.put(robots_url, r); - const path = URL.getPathname(l.pending.get(robots_url).?.items[0].url); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url); allowed = r.isAllowed(path); } } diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 42bf99a2..73d0aff3 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -45,8 +45,8 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); defer ctx.network.app.arena_pool.release(arena); - const authority = URL.getHost(req.url); - try wba.signRequest(arena, &our_req.headers, authority); + const authority = URL.getHost(req.params.url); + try wba.signRequest(arena, &our_req.params.headers, authority); return self.next.request(ctx, our_req); } From b4a9bdd7a367dc3fde0d32b00cbd5e9a5f0dacab Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 16:45:53 -0700 Subject: [PATCH 03/31] use syncRequest in ScriptManager --- src/browser/ScriptManager.zig | 80 ++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 3215eb7b..28a3aefe 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -265,13 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e } if (remote_url) |url| { - errdefer { - if (is_blocking == false) { - self.scriptList(script).remove(&script.node); - } - // Let the outer errdefer handle releasing the arena if client.request fails - } - if (comptime IS_DEBUG) { var ls: js.Local.Scope = undefined; frame.js.localScope(&ls); @@ -285,10 +278,35 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e }); } - { - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; + + const headers = try self.getHeaders(); + errdefer headers.deinit(); + + if (is_blocking) { + const response = try self.client.syncRequest(arena, .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = headers, + .blocking = true, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }); + + script.source = .{ .remote = response.body }; + script.status = response.status; + script.complete = true; + } else { + errdefer { + self.scriptList(script).remove(&script.node); + // Let the outer errdefer handle releasing the arena if client.request fails + } try self.client.request(.{ .ctx = script, @@ -297,8 +315,8 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .method = .GET, .frame_id = frame._frame_id, .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, + .headers = headers, + .blocking = false, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, @@ -319,29 +337,21 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e return; } - // this is , it needs to block the caller - // until it's evaluated - var client = self.client; - while (true) { - if (!script.complete) { - _ = try client.tick(200); - continue; - } - if (script.status == 0) { - // an error (that we already logged) - script.deinit(); - return; - } - - // could have already been evaluating if this is dynamically added - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer { - self.is_evaluating = was_evaluating; - script.deinit(); - } - return script.eval(frame); + if (script.status == 0) { + // an error (that we already logged) + script.deinit(); + return; } + + // could have already been evaluating if this is dynamically added + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer { + self.is_evaluating = was_evaluating; + script.deinit(); + } + + script.eval(frame); } fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { From e988e491361ccff100e8fb4955c10556494e1e22 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:20:30 -0700 Subject: [PATCH 04/31] remove Context and thread *Client --- src/browser/HttpClient.zig | 19 +++------- src/network/layer/CacheLayer.zig | 22 ++++++------ src/network/layer/RobotsLayer.zig | 52 +++++++++++++-------------- src/network/layer/WebBotAuthLayer.zig | 12 +++---- 4 files changed, 48 insertions(+), 57 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0dbee3bd..08c3bed5 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -137,24 +137,16 @@ robots_layer: RobotsLayer, web_bot_auth_layer: WebBotAuthLayer, entry_layer: Layer, -pub const Context = struct { - network: *Network, - - pub fn newHeaders(self: Context) !http.Headers { - return http.Headers.init(self.network.config.http_headers.user_agent_header); - } -}; - pub const Layer = struct { ptr: *anyopaque, vtable: *const VTable, pub const VTable = struct { - request: *const fn (*anyopaque, Context, Request) anyerror!void, + request: *const fn (*anyopaque, *Client, Request) anyerror!void, }; - pub fn request(self: Layer, ctx: Context, req: Request) !void { - return self.vtable.request(self.ptr, ctx, req); + pub fn request(self: Layer, client: *Client, req: Request) !void { + return self.vtable.request(self.ptr, client, req); } }; @@ -402,7 +394,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { +pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); @@ -438,8 +430,7 @@ pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void { } pub fn request(self: *Client, req: Request) !void { - const ctx = Context{ .network = self.network }; - return self.entry_layer.request(ctx, req); + return self.entry_layer.request(self, req); } const SyncContext = struct { diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 7bc36e39..317457b5 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -20,8 +20,8 @@ const std = @import("std"); const log = @import("../../log.zig"); const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; const Transfer = @import("../../browser/HttpClient.zig").Transfer; -const Context = @import("../../browser/HttpClient.zig").Context; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -44,12 +44,12 @@ pub fn layer(self: *CacheLayer) Layer { }; } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *CacheLayer = @ptrCast(@alignCast(ptr)); - const network = ctx.network; + const network = client.network; if (req.params.method != .GET) { - return self.next.request(ctx, req); + return self.next.request(client, req); } const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); @@ -71,7 +71,7 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { const cache_ctx = try arena.create(CacheContext); cache_ctx.* = .{ .arena = arena, - .context = ctx, + .client = client, .forward = Forward.fromRequest(req), .req_url = req.params.url, .req_headers = req.params.headers, @@ -89,7 +89,7 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { }, ); - return self.next.request(ctx, wrapped); + return self.next.request(client, wrapped); } fn serveFromCache(req: Request, cached: *const CachedResponse) !void { @@ -138,7 +138,7 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { const CacheContext = struct { arena: std.mem.Allocator, - context: Context, + client: *Client, transfer: ?*Transfer = null, forward: Forward, req_url: [:0]const u8, @@ -208,12 +208,12 @@ const CacheContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); if (self.pending_metadata) |metadata| { - const cache = &self.context.network.cache.?; + const cache = &self.client.network.cache.?; log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { @@ -227,13 +227,13 @@ const CacheContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardShutdown(); } fn errorCallback(ctx: *anyopaque, e: anyerror) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.context.network.app.arena_pool.release(self.arena); + defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardErr(e); } }; diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 714d205b..add4ee3d 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -21,7 +21,7 @@ const log = @import("../../log.zig"); const URL = @import("../../browser/URL.zig"); const Robots = @import("../Robots.zig"); -const Context = @import("../../browser/HttpClient.zig").Context; +const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -50,16 +50,16 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { self.pending.deinit(allocator); } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); - const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer"); - errdefer ctx.network.app.arena_pool.release(arena); + const arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer"); + errdefer client.network.app.arena_pool.release(arena); const robots_url = try URL.getRobotsUrl(arena, req.params.url); - if (ctx.network.robot_store.get(robots_url)) |robot_entry| { - defer ctx.network.app.arena_pool.release(arena); + if (client.network.robot_store.get(robots_url)) |robot_entry| { + defer client.network.app.arena_pool.release(arena); switch (robot_entry) { .present => |robots| { @@ -73,20 +73,20 @@ fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { }, .absent => {}, } - return self.next.request(ctx, req); + return self.next.request(client, req); } - return self.fetchRobotsThenRequest(ctx, arena, robots_url, req); + return self.fetchRobotsThenRequest(client, arena, robots_url, req); } fn fetchRobotsThenRequest( self: *RobotsLayer, - ctx: Context, + client: *Client, arena: std.mem.Allocator, robots_url: [:0]const u8, req: Request, ) !void { - errdefer ctx.network.app.arena_pool.release(arena); + errdefer client.network.app.arena_pool.release(arena); const entry = try self.pending.getOrPut(self.allocator, robots_url); @@ -97,16 +97,16 @@ fn fetchRobotsThenRequest( const robots_ctx = try arena.create(RobotsContext); robots_ctx.* = .{ .layer = self, - .ctx = ctx, + .client = client, .arena = arena, .robots_url = robots_url, .buffer = .empty, }; - const headers = try ctx.newHeaders(); + const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.next.request(ctx, .{ + try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ .url = robots_url, @@ -127,13 +127,13 @@ fn fetchRobotsThenRequest( .shutdown_callback = RobotsContext.shutdownCallback, }); } else { - ctx.network.app.arena_pool.release(arena); + client.network.app.arena_pool.release(arena); } try entry.value_ptr.append(self.allocator, req); } -fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void { +fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void { var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPending: missing queue"); defer queued.value.deinit(self.allocator); @@ -144,7 +144,7 @@ fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allo defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { - self.next.request(ctx, queued_req) catch |e| { + self.next.request(client, queued_req) catch |e| { defer queued_req.deinit(); queued_req.error_callback(queued_req.ctx, e); }; @@ -166,7 +166,7 @@ fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { const RobotsContext = struct { layer: *RobotsLayer, arena: std.mem.Allocator, - ctx: Context, + client: *Client, robots_url: [:0]const u8, buffer: std.ArrayListUnmanaged(u8), status: u16 = 0, @@ -201,12 +201,12 @@ const RobotsContext = struct { fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); var allowed = true; - const network = ctx.network; + const network = client.network; switch (self.status) { 200 => { @@ -239,26 +239,26 @@ const RobotsContext = struct { }, } - l.flushPending(ctx, robots_url, allowed); + l.flushPending(client, robots_url, allowed); } fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); log.warn(.http, "robots fetch failed", .{ .err = err }); - l.flushPending(ctx, robots_url, true); + l.flushPending(client, robots_url, true); } fn shutdownCallback(ctx_ptr: *anyopaque) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const ctx = self.ctx; + const client = self.client; const robots_url = self.robots_url; - defer ctx.network.app.arena_pool.release(self.arena); + defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); l.flushPendingShutdown(robots_url); diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 73d0aff3..25260872 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -21,7 +21,7 @@ const log = @import("../../log.zig"); const URL = @import("../../browser/URL.zig"); const WebBotAuth = @import("../WebBotAuth.zig"); -const Context = @import("../../browser/HttpClient.zig").Context; +const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -36,17 +36,17 @@ pub fn layer(self: *WebBotAuthLayer) Layer { }; } -fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void { +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); var our_req = req; - const wba = ctx.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); - const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); - defer ctx.network.app.arena_pool.release(arena); + const arena = try client.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); + defer client.network.app.arena_pool.release(arena); const authority = URL.getHost(req.params.url); try wba.signRequest(arena, &our_req.params.headers, authority); - return self.next.request(ctx, our_req); + return self.next.request(client, our_req); } From e37c14a71447203c869ce18f292f0eab97800ff2 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:26:42 -0700 Subject: [PATCH 05/31] Transfer now uses Request's request_id --- src/browser/HttpClient.zig | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 08c3bed5..b9b63a41 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -430,7 +430,10 @@ pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { } pub fn request(self: *Client, req: Request) !void { - return self.entry_layer.request(self, req); + // Assign Request Id. + var our_req = req; + our_req.params.request_id = self.incrReqId(); + return self.entry_layer.request(self, our_req); } const SyncContext = struct { @@ -620,11 +623,10 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); - const id = self.incrReqId(); transfer.* = .{ .start_time = timestamp(.monotonic), .arena = ArenaAllocator.init(self.allocator), - .id = id, + .id = req.params.request_id, .url = req.params.url, .req = req, .client = self, @@ -1002,6 +1004,8 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + request_id: u32 = undefined, frame_id: u32, loader_id: u32, method: Method, From 14ad5c9cdc53aa84995418a43e04f05d1f9df7ca Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 17:38:47 -0700 Subject: [PATCH 06/31] move RequestStart to InterceptionLayer --- src/Notification.zig | 3 +- src/browser/HttpClient.zig | 41 ++++++++------- src/cdp/domains/network.zig | 67 ++++++++++++++++++++++-- src/cdp/id.zig | 11 ++++ src/network/layer/InterceptionLayer.zig | 68 +++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 22 deletions(-) create mode 100644 src/network/layer/InterceptionLayer.zig diff --git a/src/Notification.zig b/src/Notification.zig index d01d29ce..20f2f12b 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,6 +21,7 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; +const Request = @import("browser/HttpClient.zig").Request; const log = lp.log; const List = std.DoublyLinkedList; @@ -162,7 +163,7 @@ pub const FrameLoaded = struct { }; pub const RequestStart = struct { - transfer: *Transfer, + request: *Request, }; pub const RequestIntercept = struct { diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b9b63a41..ef924e3f 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -44,6 +44,7 @@ const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); +pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -135,6 +136,7 @@ max_response_size: usize, cache_layer: CacheLayer, robots_layer: RobotsLayer, web_bot_auth_layer: WebBotAuthLayer, +interception_layer: InterceptionLayer, entry_layer: Layer, pub const Layer = struct { @@ -200,15 +202,12 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .cache_layer = .{}, .robots_layer = .{ .allocator = allocator }, .web_bot_auth_layer = .{}, + .interception_layer = .{}, .entry_layer = undefined, }; var next = client.layer(); - if (network.config.webBotAuth() != null) { - next = layerWith(&client.web_bot_auth_layer, next); - } - if (network.config.obeyRobots()) { next = layerWith(&client.robots_layer, next); } @@ -217,6 +216,12 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { next = layerWith(&client.cache_layer, next); } + next = layerWith(&client.interception_layer, next); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + client.entry_layer = next; return client; @@ -399,8 +404,6 @@ pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const transfer = try self.makeTransfer(req); - transfer.req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); - var wait_for_interception = false; transfer.req.params.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, @@ -1070,6 +1073,20 @@ pub const Request = struct { error_callback: ErrorCallback, shutdown_callback: ?ShutdownCallback = null, + pub fn getCookieString(self: *Request, allocator: std.mem.Allocator) !?[:0]const u8 { + const jar = self.params.cookie_jar orelse return null; + var aw: std.Io.Writer.Allocating = .init(allocator); + try jar.forRequest(self.params.url, &aw.writer, .{ + .is_http = true, + .origin_url = self.params.cookie_origin, + .is_navigation = self.params.resource_type == .document, + }); + const written = aw.written(); + if (written.len == 0) return null; + try aw.writer.writeByte(0); + return written.ptr[0..written.len :0]; + } + pub fn deinit(self: *const Request) void { self.params.deinit(); } @@ -1382,17 +1399,7 @@ pub const Transfer = struct { } pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.params.url, &aw.writer, .{ - .is_http = true, - .origin_url = self.req.params.cookie_origin, - .is_navigation = self.req.params.resource_type == .document, - }); - const written = aw.written(); - if (written.len == 0) return null; - try aw.writer.writeByte(0); - return written.ptr[0..written.len :0]; + return self.req.getCookieString(self.arena.allocator()); } pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 74c80197..f9e17194 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -27,6 +27,7 @@ const Mime = @import("../../browser/Mime.zig"); const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; const CdpStorage = @import("storage.zig"); @@ -273,8 +274,7 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; @@ -286,11 +286,11 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId(transfer), + .requestId = &id.toRequestId2(req), .loaderId = &id.toLoaderId(req.params.loader_id), .type = req.params.resource_type.string(), .documentURL = frame.url, - .request = TransferAsRequestWriter.init(transfer), + .request = RequestWriter.init(req), .initiator = .{ .type = "other" }, .redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo .hasUserGesture = false, @@ -328,6 +328,65 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request }, .{ .session_id = session_id }); } +pub const RequestWriter = struct { + request: *const Request, + + pub fn init(request: *const Request) RequestWriter { + return .{ + .request = request, + }; + } + + pub fn jsonStringify(self: *const RequestWriter, jws: anytype) !void { + self._jsonStringify(jws) catch return error.WriteFailed; + } + + fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void { + const request = self.request; + + try jws.beginObject(); + { + try jws.objectField("url"); + try jws.write(request.params.url); + } + + { + const frag = URL.getHash(request.params.url); + if (frag.len > 0) { + try jws.objectField("urlFragment"); + try jws.write(frag); + } + } + + { + try jws.objectField("method"); + try jws.write(@tagName(request.params.method)); + } + + { + try jws.objectField("hasPostData"); + try jws.write(request.params.body != null); + } + + { + try jws.objectField("headers"); + try jws.beginObject(); + var it = request.params.headers.iterator(); + while (it.next()) |hdr| { + try jws.objectField(hdr.name); + try jws.write(hdr.value); + } + // TODO: Fix. + // if (try request.getCookieString()) |cookies| { + // try jws.objectField("Cookie"); + // try jws.write(cookies[0 .. cookies.len - 1]); + // } + try jws.endObject(); + } + try jws.endObject(); + } +}; + pub const TransferAsRequestWriter = struct { transfer: *Transfer, diff --git a/src/cdp/id.zig b/src/cdp/id.zig index 5dc3a6c2..cb304f6c 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -52,6 +52,17 @@ pub fn toRequestId(transfer: *const Transfer) [14]u8 { return buf; } +const Request = @import("../browser/HttpClient.zig").Request; +pub fn toRequestId2(req: *const Request) [14]u8 { + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); + } + + var buf: [14]u8 = undefined; + _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable; + return buf; +} + pub fn toInterceptId(id: u32) [14]u8 { var buf: [14]u8 = undefined; _ = std.fmt.bufPrint(&buf, "INT-{d:0>10}", .{id}) catch unreachable; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig new file mode 100644 index 00000000..149e4154 --- /dev/null +++ b/src/network/layer/InterceptionLayer.zig @@ -0,0 +1,68 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); +const log = @import("../../log.zig"); + +const IS_DEBUG = builtin.mode == .Debug; + +const URL = @import("../../browser/URL.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const InterceptionLayer = @This(); + +// Count of intercepted requests. This is to help deal with intercepted requests. +// The client doesn't track intercepted transfers. If a request is intercepted, +// the client forgets about it and requires the interceptor to continue or abort +// it. That works well, except if we only rely on active, we might think there's +// no more network activity when, with interecepted requests, there might be more +// in the future. (We really only need this to properly emit a 'networkIdle' and +// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +intercepted: usize = 0, + +next: Layer = undefined, + +pub fn layer(self: *InterceptionLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { + const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); + var req = in_req; + + req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); + + const wait_for_interception = false; + // req.params.notification.dispatch(.http_request_intercept, &.{ + // .transfer = transfer, + // .wait_for_interception = &wait_for_interception, + // }); + + if (wait_for_interception == false) { + // request not intercepted, process it normally + return self.next.request(client, req); + } + + @panic("not implemented yet"); +} From 6d41ea6fd047ac2217534ac8cb6abdc6b5163a87 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 18:10:13 -0700 Subject: [PATCH 07/31] move arena up to Request instead of Transfer --- src/browser/HttpClient.zig | 27 +++++++++++---------------- src/cdp/domains/fetch.zig | 6 +++--- src/cdp/domains/network.zig | 15 +++++++-------- src/network/layer/RobotsLayer.zig | 2 ++ 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index ef924e3f..6becf460 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -401,7 +401,6 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); - const transfer = try self.makeTransfer(req); var wait_for_interception = false; @@ -436,6 +435,8 @@ pub fn request(self: *Client, req: Request) !void { // Assign Request Id. var our_req = req; our_req.params.request_id = self.incrReqId(); + our_req.params.arena = ArenaAllocator.init(self.allocator); + return self.entry_layer.request(self, our_req); } @@ -621,14 +622,11 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.params.headers.deinit(); - const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); transfer.* = .{ .start_time = timestamp(.monotonic), - .arena = ArenaAllocator.init(self.allocator), .id = req.params.request_id, .url = req.params.url, .req = req, @@ -1007,6 +1005,8 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + arena: ArenaAllocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, frame_id: u32, @@ -1051,6 +1051,7 @@ pub const RequestParams = struct { pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); + self.arena.deinit(); } }; @@ -1073,9 +1074,9 @@ pub const Request = struct { error_callback: ErrorCallback, shutdown_callback: ?ShutdownCallback = null, - pub fn getCookieString(self: *Request, allocator: std.mem.Allocator) !?[:0]const u8 { + pub fn getCookieString(self: *Request) !?[:0]const u8 { const jar = self.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(allocator); + var aw: std.Io.Writer.Allocating = .init(self.params.arena.allocator()); try jar.forRequest(self.params.url, &aw.writer, .{ .is_http = true, .origin_url = self.params.cookie_origin, @@ -1177,7 +1178,6 @@ pub const SyncResponse = struct { }; pub const Transfer = struct { - arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, @@ -1245,7 +1245,6 @@ pub const Transfer = struct { } self.req.deinit(); - self.arena.deinit(); self.client.transfer_pool.destroy(self); } @@ -1337,7 +1336,7 @@ pub const Transfer = struct { try conn.setHeaders(&header_list); // Add cookies from cookie jar. - if (try self.getCookieString()) |cookies| { + if (try self.req.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); } @@ -1398,10 +1397,6 @@ pub const Transfer = struct { } } - pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - return self.req.getCookieString(self.arena.allocator()); - } - pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); @@ -1418,7 +1413,7 @@ pub const Transfer = struct { fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1602,7 +1597,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena.allocator(), cl) catch {}; } } @@ -1615,7 +1610,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena.allocator(), chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 57d0bd56..452c4d05 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -245,7 +245,7 @@ fn continueRequest(cmd: *CDP.Command) !void { .new_url = params.url, }); - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); // Update the request with the new parameters if (params.url) |url| { try transfer.updateURL(try arena.dupeZ(u8, url)); @@ -309,7 +309,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { errdefer transfer.abortAuthChallenge(); // restart the request with the provided credentials. - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena.allocator(); transfer.updateCredentials( try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ params.authChallengeResponse.username, @@ -354,7 +354,7 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try transfer.req.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index f9e17194..4f34dd36 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -329,9 +329,9 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request } pub const RequestWriter = struct { - request: *const Request, + request: *Request, - pub fn init(request: *const Request) RequestWriter { + pub fn init(request: *Request) RequestWriter { return .{ .request = request, }; @@ -376,11 +376,10 @@ pub const RequestWriter = struct { try jws.objectField(hdr.name); try jws.write(hdr.value); } - // TODO: Fix. - // if (try request.getCookieString()) |cookies| { - // try jws.objectField("Cookie"); - // try jws.write(cookies[0 .. cookies.len - 1]); - // } + if (try request.getCookieString()) |cookies| { + try jws.objectField("Cookie"); + try jws.write(cookies[0 .. cookies.len - 1]); + } try jws.endObject(); } try jws.endObject(); @@ -434,7 +433,7 @@ pub const TransferAsRequestWriter = struct { try jws.objectField(hdr.name); try jws.write(hdr.value); } - if (try transfer.getCookieString()) |cookies| { + if (try transfer.req.getCookieString()) |cookies| { try jws.objectField("Cookie"); try jws.write(cookies[0 .. cookies.len - 1]); } diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index add4ee3d..e1de10a0 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -66,6 +66,8 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { + defer req.deinit(); + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); return; From 9c826159a05b0b4f7b8189cbb6111f23d21fffbc Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 19:20:57 -0700 Subject: [PATCH 08/31] crude InterceptionLayer --- src/Notification.zig | 15 +- src/browser/HttpClient.zig | 219 ++++-------------------- src/cdp/CDP.zig | 13 +- src/cdp/domains/fetch.zig | 197 +++++++++++---------- src/cdp/domains/network.zig | 106 +++++++++++- src/network/layer/CacheLayer.zig | 1 + src/network/layer/Forward.zig | 2 + src/network/layer/InterceptionLayer.zig | 176 ++++++++++++++++++- src/network/layer/RobotsLayer.zig | 2 +- 9 files changed, 422 insertions(+), 309 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index 20f2f12b..e5b7c1c4 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -22,6 +22,7 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; const Request = @import("browser/HttpClient.zig").Request; +const Response = @import("browser/HttpClient.zig").Response; const log = lp.log; const List = std.DoublyLinkedList; @@ -167,30 +168,32 @@ pub const RequestStart = struct { }; pub const RequestIntercept = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; pub const RequestAuthRequired = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; pub const ResponseData = struct { data: []const u8, - transfer: *Transfer, + request: *Request, }; pub const ResponseHeaderDone = struct { - transfer: *Transfer, + request: *Request, + response: *const Response, }; pub const RequestDone = struct { - transfer: *Transfer, + request: *Request, + content_length: usize, }; pub const RequestFail = struct { - transfer: *Transfer, + request: *Request, err: anyerror, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 6becf460..6dbb77ff 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -402,33 +402,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - - var wait_for_interception = false; - transfer.req.params.notification.dispatch(.http_request_intercept, &.{ - .transfer = transfer, - .wait_for_interception = &wait_for_interception, - }); - if (wait_for_interception == false) { - // request not intercepted, process it normally - return self.process(transfer); - } - - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - if (req.params.blocking == false) { - // The request was interecepted, but it isn't a blocking request, so we - // dont' need to block this call. The request will be unblocked - // asynchronously via either continueTransfer or abortTransfer - return; - } - - if (try self.waitForInterceptedResponse(transfer)) { - return self.process(transfer); - } + return self.process(transfer); } pub fn request(self: *Client, req: Request) !void { @@ -510,48 +484,6 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) ! } } -fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { - // The request was intercepted and is blocking. This is messy, but our - // callers, the ScriptManager -> Page, don't have a great way to stop the - // parser and return control to the CDP server to wait for the interception - // response. We have some information on the CDPClient, so we'll do the - // blocking here. (This is a bit of a legacy thing. Initially the Client - // had a 'extra_socket' that it could monitor. It was named 'extra_socket' - // to appear generic, but really, that 'extra_socket' was always the CDP - // socket. Because we already had the "extra_socket" here, it was easier to - // make it even more CDP- aware and turn `extra_socket: socket_t` into the - // current CDPClient and do the blocking here). - const cdp_client = self.cdp_client.?; - const ctx = cdp_client.ctx; - - if (cdp_client.blocking_read_start(ctx) == false) { - return error.BlockingInterceptFailure; - } - - defer _ = cdp_client.blocking_read_end(ctx); - - while (true) { - if (cdp_client.blocking_read(ctx) == false) { - return error.BlockingInterceptFailure; - } - - switch (transfer._intercept_state) { - .pending => continue, // keep waiting - .@"continue" => return true, - .abort => |err| { - transfer.abort(err); - return false; - }, - .fulfilled => { - // callbacks already called, just need to cleanups - transfer.deinit(); - return false; - }, - .not_intercepted => unreachable, - } - } -} - // Above, request will not process if there's an interception request. In such // cases, the interceptor is expected to call resume to continue the transfer // or transfer.abort() to abort it. @@ -567,50 +499,6 @@ fn process(self: *Client, transfer: *Transfer) !void { self.queue.append(&transfer._node); } -// For an intercepted request -pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.params.blocking) { - return self.process(transfer); - } - transfer._intercept_state = .@"continue"; -} - -// For an intercepted request -pub fn abortTransfer(self: *Client, transfer: *Transfer) void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.params.blocking) { - transfer.abort(error.Abort); - } - transfer._intercept_state = .{ .abort = error.Abort }; -} - -// For an intercepted request -pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - try transfer.fulfill(status, headers, body); - if (!transfer.req.params.blocking) { - transfer.deinit(); - return; - } - transfer._intercept_state = .fulfilled; -} - pub fn nextReqId(self: *Client) u32 { return self.next_request_id +% 1; } @@ -645,11 +533,6 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.params.notification.dispatch(.http_request_fail, &.{ - .transfer = transfer, - .err = err, - }); - if (execute_callback) { transfer.req.error_callback(transfer.req.ctx, err); } else if (transfer.req.shutdown_callback) |cb| { @@ -757,58 +640,6 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { - // Detect auth challenge from response headers. - // Also check on RecvError: proxy may send 407 with headers before - // closing the connection (CONNECT tunnel not yet established). - if (msg.err == null or msg.err.? == error.RecvError) { - transfer.detectAuthChallenge(msg.conn); - } - - // In case of auth challenge - // TODO give a way to configure the number of auth retries. - if (transfer._auth_challenge != null and transfer._tries < 10) { - var wait_for_interception = false; - transfer.req.params.notification.dispatch( - .http_request_auth_required, - &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, - ); - if (wait_for_interception) { - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - // Whether or not this is a blocking request, we're not going - // to process it now. We can end the transfer, which will - // release the easy handle back into the pool. The transfer - // is still valid/alive (just has no handle). - transfer.releaseConn(); - if (!transfer.req.params.blocking) { - // In the case of an async request, we can just "forget" - // about this transfer until it gets updated asynchronously - // from some CDP command. - return false; - } - - // In the case of a sync request, we need to block until we - // get the CDP command for handling this case. - if (try self.waitForInterceptedResponse(transfer)) { - // we've been asked to continue with the request - // we can't process it here, since we're already inside - // a process, so we need to queue it and wait for the - // next tick (this is why it was safe to releaseConn - // above, because even in the "blocking" path, we still - // only process it on the next tick). - self.queue.append(&transfer._node); - } else { - // aborted, already cleaned up - } - - return false; - } - } - // Handle redirects: reuse the same connection to preserve TCP state. if (msg.err == null) { const status = try msg.conn.getResponseCode(); @@ -874,11 +705,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.params.notification.dispatch(.http_response_data, &.{ - .data = body, - .transfer = transfer, - }); - if (transfer.aborted) { transfer.requestFailed(error.Abort, true); return true; @@ -891,10 +717,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T try transfer.req.done_callback(transfer.req.ctx); - transfer.req.params.notification.dispatch(.http_request_done, &.{ - .transfer = transfer, - }); - return true; } @@ -1093,11 +915,26 @@ pub const Request = struct { } }; +pub const FulfilledResponse = struct { + status: u16, + url: [:0]const u8, + headers: []const http.Header, + body: ?[]const u8, + + pub fn contentType(self: *const FulfilledResponse) ?[]const u8 { + for (self.headers) |hdr| { + if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) return hdr.value; + } + return null; + } +}; + pub const Response = struct { ctx: *anyopaque, inner: union(enum) { transfer: *Transfer, cached: *const CachedResponse, + fulfilled: *const FulfilledResponse, }, pub fn fromTransfer(transfer: *Transfer) Response { @@ -1108,10 +945,15 @@ pub const Response = struct { return .{ .ctx = ctx, .inner = .{ .cached = resp } }; } + pub fn fromFulfilled(ctx: *anyopaque, fulfilled: *const FulfilledResponse) Response { + return .{ .ctx = ctx, .inner = .{ .fulfilled = fulfilled } }; + } + pub fn status(self: Response) ?u16 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.status else null, .cached => |c| c.metadata.status, + .fulfilled => |f| f.status, }; } @@ -1119,6 +961,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| if (t.response_header) |*rh| rh.contentType() else null, .cached => |c| c.metadata.content_type, + .fulfilled => |f| f.contentType(), }; } @@ -1129,13 +972,14 @@ pub const Response = struct { .buffer => |buf| @intCast(buf.len), .file => |f| @intCast(f.len), }, + .fulfilled => |f| if (f.body) |b| @intCast(b.len) else null, }; } pub fn redirectCount(self: Response) ?u32 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.redirect_count else null, - .cached => 0, + .cached, .fulfilled => 0, }; } @@ -1143,6 +987,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.url, .cached => |c| c.metadata.url, + .fulfilled => |f| f.url, }; } @@ -1150,13 +995,14 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.responseHeaderIterator(), .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + .fulfilled => |f| HeaderIterator{ .list = .{ .list = f.headers } }, }; } pub fn abort(self: Response, err: anyerror) void { switch (self.inner) { .transfer => |t| t.abort(err), - .cached => {}, + .cached, .fulfilled => {}, } } @@ -1164,6 +1010,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| try t.format(writer), .cached => |c| try c.format(writer), + .fulfilled => |f| try writer.print("fulfilled {s}", .{f.url}), }; } }; @@ -1301,11 +1148,6 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.params.notification.dispatch(.http_request_fail, &.{ - .transfer = self, - .err = err, - }); - if (execute_callback) { self.req.error_callback(self.req.ctx, err); } else if (self.req.shutdown_callback) |cb| { @@ -1520,6 +1362,7 @@ pub const Transfer = struct { std.debug.assert(self._intercept_state != .not_intercepted); log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); } + self.client.intercepted -= 1; if (!self.req.params.blocking) { self.abort(error.AbortAuthChallenge); @@ -1557,10 +1400,6 @@ pub const Transfer = struct { } } - transfer.req.params.notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 16a08cc9..023f8bf2 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,8 +450,9 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingTransfers()) |transfer| { - transfer.abort(error.ClientDisconnect); + for (self.intercept_state.pendingRequests()) |request| { + defer request.deinit(); + request.error_callback(request.ctx, error.ClientDisconnect); } for (self.isolated_worlds.items) |world| { @@ -668,7 +669,7 @@ pub const BrowserContext = struct { const arena = self.frame_arena; // Prepare the captured response value. - const id = msg.transfer.id; + const id = msg.request.params.request_id; const gop = try self.captured_responses.getOrPut(arena, id); if (!gop.found_existing) { gop.value_ptr.* = .{ @@ -676,8 +677,8 @@ pub const BrowserContext = struct { // Encode the data in base64 by default, but don't encode // for well known content-type. .must_encode = blk: { - const transfer = msg.transfer; - if (transfer.response_header.?.contentType()) |ct| { + const response = msg.response; + if (response.contentType()) |ct| { const mime = try Mime.parse(ct); if (!mime.isText()) { @@ -705,7 +706,7 @@ pub const BrowserContext = struct { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; - const id = msg.transfer.id; + const id = msg.request.params.request_id; const resp = self.captured_responses.getPtr(id) orelse lp.assert(false, "onHttpResponseData missinf captured response", .{}); return resp.data.appendSlice(arena, msg.data); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 452c4d05..a7c5672f 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,7 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.Transfer), + waiting: std.AutoArrayHashMapUnmanaged(u32, HttpClient.Request), pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +67,11 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, transfer: *HttpClient.Transfer) !void { - return self.waiting.put(self.allocator, transfer.id, transfer); + pub fn put(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, request); } - pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.Transfer { + pub fn remove(self: *InterceptState, request_id: u32) ?HttpClient.Request { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +80,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.Transfer { + pub fn pendingRequests(self: *const InterceptState) []HttpClient.Request { return self.waiting.values(); } }; @@ -190,29 +190,28 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. const session_id = bc.session_id orelse return; // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + const request = intercept.request; + try bc.intercept_state.put(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.params.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.params.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID + .networkId = &id.toRequestId2(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -236,39 +235,48 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .new_url = params.url, }); - const arena = transfer.req.params.arena.allocator(); + const arena = request.params.arena.allocator(); // Update the request with the new parameters if (params.url) |url| { - try transfer.updateURL(try arena.dupeZ(u8, url)); + request.params.url = try arena.dupeZ(u8, url); } if (params.method) |method| { - transfer.req.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + request.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { - // Not obvious, but cmd.arena is safe here, since the headers will get - // duped by libcurl. transfer.arena is more obvious/safe, but cmd.arena - // is more efficient (it's re-used) - try transfer.replaceRequestHeaders(cmd.arena, headers); + request.params.headers.deinit(); + + var buf: std.ArrayList(u8) = .empty; + var new_headers = try bc.cdp.browser.http_client.newHeaders(); + for (headers) |hdr| { + defer buf.clearRetainingCapacity(); + try std.fmt.format(buf.writer(cmd.arena), "{s}: {s}", .{ hdr.name, hdr.value }); + try buf.append(cmd.arena, 0); + try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); + } + request.params.headers = new_headers; } if (params.postData) |b| { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.params.body = body; + request.params.body = body; } - try bc.cdp.browser.http_client.continueTransfer(transfer); + // todo: replace. + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -292,33 +300,36 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", - .id = transfer.id, + .id = request.params.request_id, .response = params.authChallengeResponse.response, }); if (params.authChallengeResponse.response != .ProvideCredentials) { - transfer.abortAuthChallenge(); + // TODO: + // request.abortAuthChallenge(); return cmd.sendResult(null, .{}); } + // TODO: // cancel the request, deinit the transfer on error. - errdefer transfer.abortAuthChallenge(); + // errdefer request.abortAuthChallenge(); + // todo: // restart the request with the provided credentials. - const arena = transfer.req.params.arena.allocator(); - transfer.updateCredentials( - try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ - params.authChallengeResponse.username, - params.authChallengeResponse.password, - }, 0), - ); + // const arena = request.params.arena.allocator(); + // request.updateCredentials( + // try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ + // params.authChallengeResponse.username, + // params.authChallengeResponse.password, + // }, 0), + // ); - transfer.reset(); - try bc.cdp.browser.http_client.continueTransfer(transfer); + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -341,12 +352,12 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .status = params.responseCode, .body = params.body != null, }); @@ -354,13 +365,13 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.req.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } - try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); - + const client = bc.cdp.browser.http_client; + try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -374,61 +385,69 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; - defer bc.cdp.browser.http_client.abortTransfer(transfer); + const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const client = bc.cdp.browser.http_client; + defer client.interception_layer.abortRequest(client, request); log.info(.cdp, "request intercept", .{ .state = "fail", .id = request_id, - .url = transfer.url, + .url = request.params.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); } pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { - // detachTarget could be called, in which case, we still have a frame doing - // things, but no session. - const session_id = bc.session_id orelse return; - - // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. - // TODO: What to do when receiving replies for a previous frame's requests? - - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); - - const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; - - try bc.cdp.sendEvent("Fetch.authRequired", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.params.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.params.resource_type) { - .script => "Script", - .xhr => "XHR", - .document => "Document", - .fetch => "Fetch", - }, - .authChallenge = .{ - .origin = "", // TODO get origin, could be the proxy address for example. - .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", - .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", - .realm = challenge.realm orelse "", - }, - .networkId = &id.toRequestId(transfer), - }, .{ .session_id = session_id }); - - log.debug(.cdp, "request auth required", .{ - .state = "paused", - .id = transfer.id, - .url = transfer.url, - }); - // Await continueWithAuth - - intercept.wait_for_interception.* = true; + _ = bc; + _ = intercept; + return error.NullAuthChallenge; } +// pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { +// // detachTarget could be called, in which case, we still have a frame doing +// // things, but no session. +// const session_id = bc.session_id orelse return; + +// // We keep it around to wait for modifications to the request. +// // NOTE: we assume whomever created the request created it with a lifetime of the Page. +// // TODO: What to do when receiving replies for a previous frame's requests? + +// const transfer = intercept.transfer; +// try bc.intercept_state.put(transfer); + +// const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; + +// try bc.cdp.sendEvent("Fetch.authRequired", .{ +// .requestId = &id.toInterceptId(transfer.id), +// .frameId = &id.toFrameId(transfer.req.params.frame_id), +// .request = network.TransferAsRequestWriter.init(transfer), +// .resourceType = switch (transfer.req.params.resource_type) { +// .script => "Script", +// .xhr => "XHR", +// .document => "Document", +// .fetch => "Fetch", +// }, +// .authChallenge = .{ +// .origin = "", // TODO get origin, could be the proxy address for example. +// .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", +// .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", +// .realm = challenge.realm orelse "", +// }, +// .networkId = &id.toRequestId(transfer), +// }, .{ .session_id = session_id }); + +// log.debug(.cdp, "request auth required", .{ +// .state = "paused", +// .id = transfer.id, +// .url = transfer.url, +// }); +// // Await continueWithAuth + +// intercept.wait_for_interception.* = true; +// } + // Get u32 from requestId which is formatted as: "INT-{d}" fn idFromRequestId(request_id: []const u8) !u32 { if (!std.mem.startsWith(u8, request_id, "INT-")) { diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 4f34dd36..06581c15 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -28,6 +28,7 @@ const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; const CdpStorage = @import("storage.zig"); @@ -261,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId(msg.transfer), + .requestId = &id.toRequestId2(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -304,15 +305,14 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ .frameId = &id.toFrameId(req.params.frame_id), - .requestId = &id.toRequestId(transfer), + .requestId = &id.toRequestId2(req), .loaderId = &id.toLoaderId(req.params.loader_id), - .response = TransferAsResponseWriter.init(arena, transfer), + .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); } @@ -321,10 +321,10 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request // detachTarget could be called, in which case, we still have a frame doing // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; + const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId(transfer), - .encodedDataLength = transfer.bytes_received, + .requestId = &id.toRequestId2(req), + .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } @@ -443,6 +443,96 @@ pub const TransferAsRequestWriter = struct { } }; +const ResponseWriter = struct { + arena: Allocator, + response: *const Response, + + fn init(arena: Allocator, response: *const Response) ResponseWriter { + return .{ + .arena = arena, + .response = response, + }; + } + + pub fn jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + self._jsonStringify(jws) catch return error.WriteFailed; + } + + fn _jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + const response = self.response; + + try jws.beginObject(); + { + try jws.objectField("url"); + try jws.write(response.url()); + } + + if (response.status()) |status| { + try jws.objectField("status"); + try jws.write(status); + + try jws.objectField("statusText"); + try jws.write(@as(std.http.Status, @enumFromInt(status)).phrase() orelse "Unknown"); + } + + { + const mime: Mime = blk: { + if (response.contentType()) |ct| { + break :blk try Mime.parse(ct); + } + break :blk .unknown; + }; + + try jws.objectField("mimeType"); + try jws.write(mime.contentTypeString()); + try jws.objectField("charset"); + try jws.write(mime.charsetString()); + } + + { + try jws.objectField("timing"); + try jws.write(.{ + // TODO: fix + .requestTime = -1, + .connectEnd = -1, + .connectStart = -1, + .dnsEnd = -1, + .dnsStart = -1, + .proxyEnd = -1, + .proxyStart = -1, + .receiveHeadersEnd = -1, + .receiveHeadersStart = -1, + .sendEnd = -1, + .sendStart = -1, + .sslEnd = -1, + .sslStart = -1, + }); + } + + { + // chromedp doesn't like having duplicate header names. It's pretty + // common to get these from a server (e.g. for Cache-Control), but + // Chrome joins these. So we have to too. + const arena = self.arena; + var it = response.headerIterator(); + var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; + while (it.next()) |hdr| { + const gop = try map.getOrPut(arena, hdr.name); + if (gop.found_existing) { + // yes, chrome joins multi-value headers with a \n + gop.value_ptr.* = try std.mem.join(arena, "\n", &.{ gop.value_ptr.*, hdr.value }); + } else { + gop.value_ptr.* = hdr.value; + } + } + + try jws.objectField("headers"); + try jws.write(std.json.ArrayHashMap([]const u8){ .map = map }); + } + try jws.endObject(); + } +}; + const TransferAsResponseWriter = struct { arena: Allocator, transfer: *Transfer, diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 317457b5..6c5c72c6 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -80,6 +80,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wrapped = cache_ctx.forward.wrapRequest( req, cache_ctx, + client.incrReqId(), .{ .start = CacheContext.startCallback, .header = CacheContext.headerCallback, diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig index b11ff23f..2a81ec14 100644 --- a/src/network/layer/Forward.zig +++ b/src/network/layer/Forward.zig @@ -54,12 +54,14 @@ pub fn wrapRequest( self: *Forward, req: Request, new_ctx: anytype, + new_id: u32, overrides: Overrides, ) Request { const T = @TypeOf(new_ctx.*); const PassthroughT = makePassthrough(T, "forward"); var wrapped = req; wrapped.ctx = new_ctx; + wrapped.params.request_id = new_id; wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; wrapped.header_callback = overrides.header orelse PassthroughT.header; wrapped.data_callback = overrides.data orelse PassthroughT.data; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 149e4154..d6905614 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -22,10 +22,14 @@ const log = @import("../../log.zig"); const IS_DEBUG = builtin.mode == .Debug; +const http = @import("../http.zig"); const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); const InterceptionLayer = @This(); @@ -49,20 +53,174 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - var req = in_req; + var pre_wrap_req = in_req; + + // Wrap callbacks to intercept notifications + const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); + intercept_ctx.* = .{ + .forward = Forward.fromRequest(pre_wrap_req), + .request = pre_wrap_req, + }; + + var req = intercept_ctx.forward.wrapRequest( + pre_wrap_req, + intercept_ctx, + client.incrReqId(), + .{ + .start = InterceptContext.startCallback, + .header = InterceptContext.headerCallback, + .data = InterceptContext.dataCallback, + .done = InterceptContext.doneCallback, + .err = InterceptContext.errorCallback, + .shutdown = InterceptContext.shutdownCallback, + }, + ); req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); - const wait_for_interception = false; - // req.params.notification.dispatch(.http_request_intercept, &.{ - // .transfer = transfer, - // .wait_for_interception = &wait_for_interception, - // }); + var wait_for_interception = false; + req.params.notification.dispatch(.http_request_intercept, &.{ + .request = &req, + .wait_for_interception = &wait_for_interception, + }); - if (wait_for_interception == false) { - // request not intercepted, process it normally + if (!wait_for_interception) { return self.next.request(client, req); } - @panic("not implemented yet"); + self.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); + } +} + +pub const InterceptContext = struct { + forward: Forward, + request: Request, + content_length: usize = 0, + + fn startCallback(response: Response) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + self.content_length = response.contentLength() orelse 0; + + self.request.params.notification.dispatch(.http_response_header_done, &.{ + .request = &self.request, + .response = &response, + }); + return self.forward.forwardHeader(response); + } + + fn dataCallback(response: Response, chunk: []const u8) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + + self.request.params.notification.dispatch(.http_response_data, &.{ + .data = chunk, + .request = &self.request, + }); + + return self.forward.forwardData(response, chunk); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.request.params.notification.dispatch(.http_request_done, &.{ + .request = &self.request, + .content_length = self.content_length, + }); + return self.forward.forwardDone(); + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = err, + }); + self.forward.forwardErr(err); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardShutdown(); + } +}; + +// CDP Callbacks + +pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { + if (comptime IS_DEBUG) { + log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + return self.next.request(client, req); +} + +pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { + _ = client; + + if (comptime IS_DEBUG) { + log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); + } + self.intercepted -= 1; + + defer req.deinit(); + req.error_callback(req.ctx, error.Abort); +} + +fn fulfillInner( + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + const fulfilled = FulfilledResponse{ + .status = status, + .url = req.params.url, + .headers = headers, + .body = body, + }; + + const response = Response.fromFulfilled(req.ctx, &fulfilled); + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + if (body) |b| { + try req.data_callback(response, b); + } + + try req.done_callback(req.ctx); +} + +pub fn fulfillRequest( + self: *InterceptionLayer, + _: *Client, + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + if (comptime IS_DEBUG) { + log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer req.deinit(); + + fulfillInner(req, status, headers, body) catch |err| { + req.error_callback(req.ctx, err); + return err; + }; } diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index e1de10a0..eb38b720 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -190,7 +190,7 @@ const RobotsContext = struct { try self.buffer.ensureTotalCapacity(self.arena, cl); } }, - .cached => {}, + else => {}, } return true; } From 0d50f706dbddc40eae912e1e5d75e4025b1251e5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 19:56:13 -0700 Subject: [PATCH 09/31] more fixing of hanging in cdp interception --- src/Notification.zig | 2 + src/browser/HttpClient.zig | 39 ++++++----- src/browser/Runner.zig | 4 +- src/cdp/domains/fetch.zig | 87 ++++++++++++------------- src/cdp/domains/page.zig | 2 +- src/network/layer/InterceptionLayer.zig | 56 ++++++++++++++++ 6 files changed, 121 insertions(+), 69 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index e5b7c1c4..96b8f822 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -23,6 +23,7 @@ const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; const Request = @import("browser/HttpClient.zig").Request; const Response = @import("browser/HttpClient.zig").Response; +const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext; const log = lp.log; const List = std.DoublyLinkedList; @@ -174,6 +175,7 @@ pub const RequestIntercept = struct { pub const RequestAuthRequired = struct { request: *Request, + intercept_ctx: *InterceptContext, wait_for_interception: *bool, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 6dbb77ff..042b5406 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -65,15 +65,6 @@ ws_active: usize = 0, // Count of active http requests http_active: usize = 0, -// Count of intercepted requests. This is to help deal with intercepted requests. -// The client doesn't track intercepted transfers. If a request is intercepted, -// the client forgets about it and requires the interceptor to continue or abort -// it. That works well, except if we only rely on active, we might think there's -// no more network activity when, with interecepted requests, there might be more -// in the future. (We really only need this to properly emit a 'networkIdle' and -// 'networkAlmostIdle' Page.lifecycleEvent in CDP). -intercepted: usize = 0, - // Our curl multi handle. handles: http.Handles, @@ -471,7 +462,15 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) ! }); while (sync_ctx.completion == .in_progress) { - _ = try self.tick(200); + const status = try self.tick(200); + log.debug(.http, "sync request tick", .{ .status = status }); + switch (status) { + .cdp_socket => { + const cdp = self.cdp_client.?; + _ = cdp.blocking_read(cdp.ctx); + }, + .normal => continue, + } } switch (sync_ctx.completion) { @@ -831,6 +830,7 @@ pub const RequestParams = struct { arena: ArenaAllocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, + frame_id: u32, loader_id: u32, method: Method, @@ -1312,25 +1312,24 @@ pub const Transfer = struct { } } - fn detectAuthChallenge(transfer: *Transfer, conn: *const http.Connection) void { - const status = conn.getResponseCode() catch return; - const connect_status = conn.getConnectCode() catch return; + pub fn detectAuthChallenge(conn: *const http.Connection) ?http.AuthChallenge { + const status = conn.getResponseCode() catch return null; + const connect_status = conn.getConnectCode() catch return null; if (status != 401 and status != 407 and connect_status != 401 and connect_status != 407) { - transfer._auth_challenge = null; - return; + return null; } if (conn.getResponseHeader("WWW-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; + return http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getConnectHeader("WWW-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; + return http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getResponseHeader("Proxy-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else if (conn.getConnectHeader("Proxy-Authenticate", 0)) |hdr| { - transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else { - transfer._auth_challenge = .{ .status = status, .source = null, .scheme = null, .realm = null }; + return .{ .status = status, .source = null, .scheme = null, .realm = null }; } } diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 68a7ad59..2489382e 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -185,7 +185,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { try frame.dispatchLoad(); const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { frame.notifyNetworkAlmostIdle(); } @@ -211,7 +211,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { // because is_cdp is false, and that can only be // the case when interception isn't possible. if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); + std.debug.assert(http_client.interception_layer.intercepted == 0); } if (browser.hasBackgroundTasks()) { diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index a7c5672f..2de7ee3d 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -400,54 +400,49 @@ fn failRequest(cmd: *CDP.Command) !void { } pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { - _ = bc; - _ = intercept; - return error.NullAuthChallenge; + // detachTarget could be called, in which case, we still have a frame doing + // things, but no session. + const session_id = bc.session_id orelse return; + + // We keep it around to wait for modifications to the request. + // NOTE: we assume whomever created the request created it with a lifetime of the Page. + // TODO: What to do when receiving replies for a previous frame's requests? + + const intercept_ctx = intercept.intercept_ctx; + const request = intercept.request; + try bc.intercept_state.put(request.*); + + const challenge = intercept_ctx.auth_challenge orelse return error.NullAuthChallenge; + + try bc.cdp.sendEvent("Fetch.authRequired", .{ + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { + .script => "Script", + .xhr => "XHR", + .document => "Document", + .fetch => "Fetch", + }, + .authChallenge = .{ + .origin = "", // TODO get origin, could be the proxy address for example. + .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", + .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", + .realm = challenge.realm orelse "", + }, + .networkId = &id.toRequestId2(request), + }, .{ .session_id = session_id }); + + log.debug(.cdp, "request auth required", .{ + .state = "paused", + .id = request.params.request_id, + .url = request.params.url, + }); + // Await continueWithAuth + + intercept.wait_for_interception.* = true; } -// pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notification.RequestAuthRequired) !void { -// // detachTarget could be called, in which case, we still have a frame doing -// // things, but no session. -// const session_id = bc.session_id orelse return; - -// // We keep it around to wait for modifications to the request. -// // NOTE: we assume whomever created the request created it with a lifetime of the Page. -// // TODO: What to do when receiving replies for a previous frame's requests? - -// const transfer = intercept.transfer; -// try bc.intercept_state.put(transfer); - -// const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; - -// try bc.cdp.sendEvent("Fetch.authRequired", .{ -// .requestId = &id.toInterceptId(transfer.id), -// .frameId = &id.toFrameId(transfer.req.params.frame_id), -// .request = network.TransferAsRequestWriter.init(transfer), -// .resourceType = switch (transfer.req.params.resource_type) { -// .script => "Script", -// .xhr => "XHR", -// .document => "Document", -// .fetch => "Fetch", -// }, -// .authChallenge = .{ -// .origin = "", // TODO get origin, could be the proxy address for example. -// .source = if (challenge.source) |s| (if (s == .server) "Server" else "Proxy") else "", -// .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", -// .realm = challenge.realm orelse "", -// }, -// .networkId = &id.toRequestId(transfer), -// }, .{ .session_id = session_id }); - -// log.debug(.cdp, "request auth required", .{ -// .state = "paused", -// .id = transfer.id, -// .url = transfer.url, -// }); -// // Await continueWithAuth - -// intercept.wait_for_interception.* = true; -// } - // Get u32 from requestId which is formatted as: "INT-{d}" fn idFromRequestId(request_id: []const u8) !u32 { if (!std.mem.startsWith(u8, request_id, "INT-")) { diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index c77319ae..239b5122 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -145,7 +145,7 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void { const http_client = frame._session.browser.http_client; const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index d6905614..f5bc6aaa 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -25,6 +25,7 @@ const IS_DEBUG = builtin.mode == .Debug; const http = @import("../http.zig"); const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; @@ -59,6 +60,7 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); intercept_ctx.* = .{ .forward = Forward.fromRequest(pre_wrap_req), + .layer = self, .request = pre_wrap_req, }; @@ -96,18 +98,59 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { pub const InterceptContext = struct { forward: Forward, + layer: *InterceptionLayer, request: Request, content_length: usize = 0, + auth_challenge: ?http.AuthChallenge = null, + tries: usize = 0, + fn startCallback(response: Response) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept start", .{ .url = self.request.params.url }); return self.forward.forwardStart(response); } fn headerCallback(response: Response) anyerror!bool { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept header", .{ + .url = self.request.params.url, + .status = response.status(), + .content_length = response.contentLength(), + }); + self.content_length = response.contentLength() orelse 0; + switch (response.inner) { + .transfer => |t| { + const status = t.response_header.?.status; + if (status == 401 or status == 407) { + self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + + if (self.auth_challenge != null and self.tries < 10) { + var wait_for_interception = false; + + self.request.params.notification.dispatch(.http_request_auth_required, &.{ + .request = &self.request, + .intercept_ctx = self, + .wait_for_interception = &wait_for_interception, + }); + + if (wait_for_interception) { + log.debug(.http, "intercept auth required", .{ + .url = self.request.params.url, + .status = status, + .intercepted = self.layer.intercepted, + }); + self.layer.intercepted += 1; + return false; + } + } + } + }, + else => {}, + } + self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, @@ -117,6 +160,10 @@ pub const InterceptContext = struct { fn dataCallback(response: Response, chunk: []const u8) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept data", .{ + .url = self.request.params.url, + .len = chunk.len, + }); self.request.params.notification.dispatch(.http_response_data, &.{ .data = chunk, @@ -128,6 +175,10 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept done", .{ + .url = self.request.params.url, + .content_length = self.content_length, + }); self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, @@ -137,6 +188,10 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept error", .{ + .url = self.request.params.url, + .err = err, + }); self.request.params.notification.dispatch(.http_request_fail, &.{ .request = &self.request, .err = err, @@ -146,6 +201,7 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.forward.forwardShutdown(); } }; From dddd0dfb90db95274f37def99dcafa7e686c631c Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 21:11:13 -0700 Subject: [PATCH 10/31] fix request id mismatch on cdp --- src/network/layer/CacheLayer.zig | 1 - src/network/layer/Forward.zig | 2 - src/network/layer/InterceptionLayer.zig | 65 ++++++++++++++----------- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 6c5c72c6..317457b5 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -80,7 +80,6 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wrapped = cache_ctx.forward.wrapRequest( req, cache_ctx, - client.incrReqId(), .{ .start = CacheContext.startCallback, .header = CacheContext.headerCallback, diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig index 2a81ec14..b11ff23f 100644 --- a/src/network/layer/Forward.zig +++ b/src/network/layer/Forward.zig @@ -54,14 +54,12 @@ pub fn wrapRequest( self: *Forward, req: Request, new_ctx: anytype, - new_id: u32, overrides: Overrides, ) Request { const T = @TypeOf(new_ctx.*); const PassthroughT = makePassthrough(T, "forward"); var wrapped = req; wrapped.ctx = new_ctx; - wrapped.params.request_id = new_id; wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; wrapped.header_callback = overrides.header orelse PassthroughT.header; wrapped.data_callback = overrides.data orelse PassthroughT.data; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index f5bc6aaa..21225815 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -67,7 +67,6 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { var req = intercept_ctx.forward.wrapRequest( pre_wrap_req, intercept_ctx, - client.incrReqId(), .{ .start = InterceptContext.startCallback, .header = InterceptContext.headerCallback, @@ -86,6 +85,12 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { .wait_for_interception = &wait_for_interception, }); + log.debug(.http, "interception check", .{ + .wait_for_interception = wait_for_interception, + .intercepted = self.intercepted, + .url = req.params.url, + }); + if (!wait_for_interception) { return self.next.request(client, req); } @@ -121,35 +126,35 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - switch (response.inner) { - .transfer => |t| { - const status = t.response_header.?.status; - if (status == 401 or status == 407) { - self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + // switch (response.inner) { + // .transfer => |t| { + // const status = t.response_header.?.status; + // if (status == 401 or status == 407) { + // self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - if (self.auth_challenge != null and self.tries < 10) { - var wait_for_interception = false; + // if (self.auth_challenge != null and self.tries < 10) { + // var wait_for_interception = false; - self.request.params.notification.dispatch(.http_request_auth_required, &.{ - .request = &self.request, - .intercept_ctx = self, - .wait_for_interception = &wait_for_interception, - }); + // self.request.params.notification.dispatch(.http_request_auth_required, &.{ + // .request = &self.request, + // .intercept_ctx = self, + // .wait_for_interception = &wait_for_interception, + // }); - if (wait_for_interception) { - log.debug(.http, "intercept auth required", .{ - .url = self.request.params.url, - .status = status, - .intercepted = self.layer.intercepted, - }); - self.layer.intercepted += 1; - return false; - } - } - } - }, - else => {}, - } + // if (wait_for_interception) { + // log.debug(.http, "intercept auth required", .{ + // .url = self.request.params.url, + // .status = status, + // .intercepted = self.layer.intercepted, + // }); + // self.layer.intercepted += 1; + // return false; + // } + // } + // } + // }, + // else => {}, + // } self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, @@ -202,6 +207,10 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = error.Shutdown, + }); self.forward.forwardShutdown(); } }; @@ -269,7 +278,7 @@ pub fn fulfillRequest( body: ?[]const u8, ) !void { if (comptime IS_DEBUG) { - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); + log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; From d0b421b085c9f502e455dcd8e63f88ce71869f2b Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 21:41:59 -0700 Subject: [PATCH 11/31] partial auth challenge support --- src/cdp/domains/fetch.zig | 30 +++++------ src/network/layer/InterceptionLayer.zig | 71 +++++++++++++++---------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 2de7ee3d..0f638b81 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -300,7 +300,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -308,27 +308,27 @@ fn continueWithAuth(cmd: *CDP.Command) !void { .response = params.authChallengeResponse.response, }); + const client = bc.cdp.browser.http_client; + if (params.authChallengeResponse.response != .ProvideCredentials) { - // TODO: - // request.abortAuthChallenge(); + client.interception_layer.abortAuthChallenge(request); return cmd.sendResult(null, .{}); } - // TODO: // cancel the request, deinit the transfer on error. - // errdefer request.abortAuthChallenge(); + errdefer client.interception_layer.abortAuthChallenge(request); - // todo: - // restart the request with the provided credentials. - // const arena = request.params.arena.allocator(); - // request.updateCredentials( - // try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ - // params.authChallengeResponse.username, - // params.authChallengeResponse.password, - // }, 0), - // ); + const arena = request.params.arena.allocator(); + request.params.credentials = try std.fmt.allocPrintSentinel( + arena, + "{s}:{s}", + .{ + params.authChallengeResponse.username, + params.authChallengeResponse.password, + }, + 0, + ); - const client = bc.cdp.browser.http_client; try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 21225815..823a5314 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -126,40 +126,21 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - // switch (response.inner) { - // .transfer => |t| { - // const status = t.response_header.?.status; - // if (status == 401 or status == 407) { - // self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - - // if (self.auth_challenge != null and self.tries < 10) { - // var wait_for_interception = false; - - // self.request.params.notification.dispatch(.http_request_auth_required, &.{ - // .request = &self.request, - // .intercept_ctx = self, - // .wait_for_interception = &wait_for_interception, - // }); - - // if (wait_for_interception) { - // log.debug(.http, "intercept auth required", .{ - // .url = self.request.params.url, - // .status = status, - // .intercepted = self.layer.intercepted, - // }); - // self.layer.intercepted += 1; - // return false; - // } - // } - // } - // }, - // else => {}, - // } + switch (response.inner) { + .transfer => |t| { + const status = t.response_header.?.status; + if (status == 401 or status == 407) { + self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); + } + }, + else => {}, + } self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, }); + return self.forward.forwardHeader(response); } @@ -184,6 +165,27 @@ pub const InterceptContext = struct { .url = self.request.params.url, .content_length = self.content_length, }); + + // if (self.auth_challenge != null and self.tries < 10) { + // var wait_for_interception = false; + // self.request.params.notification.dispatch(.http_request_auth_required, &.{ + // .request = &self.request, + // .intercept_ctx = self, + // .wait_for_interception = &wait_for_interception, + // }); + + // if (wait_for_interception) { + // log.debug(.http, "intercept auth required", .{ + // .url = self.request.params.url, + // .intercepted = self.layer.intercepted, + // }); + // self.layer.intercepted += 1; + // self.tries += 1; + // // Don't forward done — CDP owns this now, will retry via continueWithAuth + // return; + // } + // } + self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, @@ -289,3 +291,14 @@ pub fn fulfillRequest( return err; }; } + +pub fn abortAuthChallenge(self: *InterceptionLayer, req: Request) void { + if (comptime IS_DEBUG) { + log.debug(.http, "abort auth transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer req.deinit(); + req.error_callback(req.ctx, error.AbortAuthChallenge); + return; +} From 3db3281e8eff0415f9de95ce93b27c7e5396e37d Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:04:28 -0700 Subject: [PATCH 12/31] working authentication with InterceptionLayer --- src/Notification.zig | 3 +- src/browser/HttpClient.zig | 68 ++++++++--- src/browser/ScriptManager.zig | 4 - src/cdp/CDP.zig | 13 ++- src/cdp/domains/fetch.zig | 58 +++++---- src/cdp/domains/network.zig | 149 ------------------------ src/network/layer/InterceptionLayer.zig | 33 ------ 7 files changed, 98 insertions(+), 230 deletions(-) diff --git a/src/Notification.zig b/src/Notification.zig index 96b8f822..f429e7b9 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -174,8 +174,7 @@ pub const RequestIntercept = struct { }; pub const RequestAuthRequired = struct { - request: *Request, - intercept_ctx: *InterceptContext, + transfer: *Transfer, wait_for_interception: *bool, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 042b5406..7600ad5a 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -639,6 +639,33 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { + if (msg.err == null or msg.err.? == error.RecvError) { + transfer.detectAuthChallenge(msg.conn); + } + + // In case of auth challenge + // TODO give a way to configure the number of auth retries. + if (transfer._auth_challenge != null and transfer._tries < 10) { + var wait_for_interception = false; + transfer.req.params.notification.dispatch( + .http_request_auth_required, + &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, + ); + if (wait_for_interception) { + self.interception_layer.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for auth interception", .{ .intercepted = self.interception_layer.intercepted }); + } + + // Whether or not this is a blocking request, we're not going + // to process it now. We can end the transfer, which will + // release the easy handle back into the pool. The transfer + // is still valid/alive (just has no handle). + transfer.releaseConn(); + return false; + } + } + // Handle redirects: reuse the same connection to preserve TCP state. if (msg.err == null) { const status = try msg.conn.getResponseCode(); @@ -1068,7 +1095,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - _intercept_state: InterceptState = .not_intercepted, const InterceptState = union(enum) { not_intercepted, @@ -1312,24 +1338,25 @@ pub const Transfer = struct { } } - pub fn detectAuthChallenge(conn: *const http.Connection) ?http.AuthChallenge { - const status = conn.getResponseCode() catch return null; - const connect_status = conn.getConnectCode() catch return null; + fn detectAuthChallenge(transfer: *Transfer, conn: *const http.Connection) void { + const status = conn.getResponseCode() catch return; + const connect_status = conn.getConnectCode() catch return; if (status != 401 and status != 407 and connect_status != 401 and connect_status != 407) { - return null; + transfer._auth_challenge = null; + return; } if (conn.getResponseHeader("WWW-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .server, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getConnectHeader("WWW-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .server, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .server, hdr.value) catch null; } else if (conn.getResponseHeader("Proxy-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else if (conn.getConnectHeader("Proxy-Authenticate", 0)) |hdr| { - return http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; + transfer._auth_challenge = http.AuthChallenge.parse(status, .proxy, hdr.value) catch null; } else { - return .{ .status = status, .source = null, .scheme = null, .realm = null }; + transfer._auth_challenge = .{ .status = status, .source = null, .scheme = null, .realm = null }; } } @@ -1358,16 +1385,12 @@ pub const Transfer = struct { // before interception process. pub fn abortAuthChallenge(self: *Transfer) void { if (comptime IS_DEBUG) { - std.debug.assert(self._intercept_state != .not_intercepted); - log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); + log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.interception_layer.intercepted }); } - self.client.intercepted -= 1; - if (!self.req.params.blocking) { - self.abort(error.AbortAuthChallenge); - return; - } - self._intercept_state = .{ .abort = error.AbortAuthChallenge }; + self.client.interception_layer.intercepted -= 1; + self.abort(error.AbortAuthChallenge); + return; } // headerDoneCallback is called once the headers have been read. @@ -1551,6 +1574,15 @@ pub const Transfer = struct { } }; +pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { + if (comptime IS_DEBUG) { + log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); + } + + self.interception_layer.intercepted -= 1; + return self.process(transfer); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 28a3aefe..f707f390 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -680,7 +680,6 @@ pub const Script = struct { debug_transfer_aborted: bool = false, debug_transfer_bytes_received: usize = 0, debug_transfer_notified_fail: bool = false, - debug_transfer_intercept_state: u8 = 0, debug_transfer_auth_challenge: bool = false, debug_transfer_easy_id: usize = 0, @@ -756,7 +755,6 @@ pub const Script = struct { .a3 = self.debug_transfer_aborted, .a4 = self.debug_transfer_bytes_received, .a5 = self.debug_transfer_notified_fail, - .a7 = self.debug_transfer_intercept_state, .a8 = self.debug_transfer_auth_challenge, .a9 = self.debug_transfer_easy_id, .b1 = transfer.id, @@ -764,7 +762,6 @@ pub const Script = struct { .b3 = transfer.aborted, .b4 = transfer.bytes_received, .b5 = transfer._notified_fail, - .b7 = @intFromEnum(transfer._intercept_state), .b8 = transfer._auth_challenge != null, .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, }); @@ -774,7 +771,6 @@ pub const Script = struct { self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; }, diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 023f8bf2..326e93c5 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,9 +450,16 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingRequests()) |request| { - defer request.deinit(); - request.error_callback(request.ctx, error.ClientDisconnect); + for (self.intercept_state.pendingIntercepts()) |intercept| { + switch (intercept) { + .transfer => |t| { + t.abort(error.ClientDisconnect); + }, + .request => |r| { + defer r.deinit(); + r.error_callback(r.ctx, error.ClientDisconnect); + }, + } } for (self.isolated_worlds.items) |world| { diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 0f638b81..7eff321e 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,12 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, HttpClient.Request), + waiting: std.AutoArrayHashMapUnmanaged(u32, Pending), + + const Pending = union(enum) { + transfer: *HttpClient.Transfer, + request: HttpClient.Request, + }; pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +72,15 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, request: HttpClient.Request) !void { - return self.waiting.put(self.allocator, request.params.request_id, request); + pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request }); } - pub fn remove(self: *InterceptState, request_id: u32) ?HttpClient.Request { + pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void { + return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer }); + } + + pub fn remove(self: *InterceptState, request_id: u32) ?Pending { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +89,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingRequests(self: *const InterceptState) []HttpClient.Request { + pub fn pendingIntercepts(self: *const InterceptState) []Pending { return self.waiting.values(); } }; @@ -193,7 +202,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. // TODO: What to do when receiving replies for a previous frame's requests? const request = intercept.request; - try bc.intercept_state.put(request.*); + try bc.intercept_state.putRequest(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ .requestId = &id.toInterceptId(request.params.request_id), @@ -235,7 +244,9 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "continue", @@ -300,7 +311,9 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const transfer = pending.transfer; + var request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -311,15 +324,15 @@ fn continueWithAuth(cmd: *CDP.Command) !void { const client = bc.cdp.browser.http_client; if (params.authChallengeResponse.response != .ProvideCredentials) { - client.interception_layer.abortAuthChallenge(request); + transfer.abortAuthChallenge(); return cmd.sendResult(null, .{}); } // cancel the request, deinit the transfer on error. - errdefer client.interception_layer.abortAuthChallenge(request); + errdefer transfer.abortAuthChallenge(); const arena = request.params.arena.allocator(); - request.params.credentials = try std.fmt.allocPrintSentinel( + transfer.updateCredentials(try std.fmt.allocPrintSentinel( arena, "{s}:{s}", .{ @@ -327,9 +340,9 @@ fn continueWithAuth(cmd: *CDP.Command) !void { params.authChallengeResponse.password, }, 0, - ); + )); - try client.interception_layer.continueRequest(client, request); + try client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -352,7 +365,9 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - var request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", @@ -385,7 +400,8 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const request = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = pending.request; const client = bc.cdp.browser.http_client; defer client.interception_layer.abortRequest(client, request); @@ -408,16 +424,16 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const intercept_ctx = intercept.intercept_ctx; - const request = intercept.request; - try bc.intercept_state.put(request.*); + const transfer = intercept.transfer; + try bc.intercept_state.putTransfer(transfer); + var request = transfer.req; - const challenge = intercept_ctx.auth_challenge orelse return error.NullAuthChallenge; + const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; try bc.cdp.sendEvent("Fetch.authRequired", .{ .requestId = &id.toInterceptId(request.params.request_id), .frameId = &id.toFrameId(request.params.frame_id), - .request = network.RequestWriter.init(request), + .request = network.RequestWriter.init(&request), .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", @@ -430,7 +446,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId2(request), + .networkId = &id.toRequestId2(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 06581c15..1458bb0c 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -386,63 +386,6 @@ pub const RequestWriter = struct { } }; -pub const TransferAsRequestWriter = struct { - transfer: *Transfer, - - pub fn init(transfer: *Transfer) TransferAsRequestWriter { - return .{ - .transfer = transfer, - }; - } - - pub fn jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - self._jsonStringify(jws) catch return error.WriteFailed; - } - fn _jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - const transfer = self.transfer; - - try jws.beginObject(); - { - try jws.objectField("url"); - try jws.write(transfer.url); - } - - { - const frag = URL.getHash(transfer.url); - if (frag.len > 0) { - try jws.objectField("urlFragment"); - try jws.write(frag); - } - } - - { - try jws.objectField("method"); - try jws.write(@tagName(transfer.req.params.method)); - } - - { - try jws.objectField("hasPostData"); - try jws.write(transfer.req.params.body != null); - } - - { - try jws.objectField("headers"); - try jws.beginObject(); - var it = transfer.req.params.headers.iterator(); - while (it.next()) |hdr| { - try jws.objectField(hdr.name); - try jws.write(hdr.value); - } - if (try transfer.req.getCookieString()) |cookies| { - try jws.objectField("Cookie"); - try jws.write(cookies[0 .. cookies.len - 1]); - } - try jws.endObject(); - } - try jws.endObject(); - } -}; - const ResponseWriter = struct { arena: Allocator, response: *const Response, @@ -533,98 +476,6 @@ const ResponseWriter = struct { } }; -const TransferAsResponseWriter = struct { - arena: Allocator, - transfer: *Transfer, - - fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { - return .{ - .arena = arena, - .transfer = transfer, - }; - } - - pub fn jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - self._jsonStringify(jws) catch return error.WriteFailed; - } - - fn _jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - const transfer = self.transfer; - - try jws.beginObject(); - { - try jws.objectField("url"); - try jws.write(transfer.url); - } - - if (transfer.response_header) |*rh| { - // it should not be possible for this to be false, but I'm not - // feeling brave today. - const status = rh.status; - try jws.objectField("status"); - try jws.write(status); - - try jws.objectField("statusText"); - try jws.write(@as(std.http.Status, @enumFromInt(status)).phrase() orelse "Unknown"); - } - - { - const mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { - break :blk try Mime.parse(ct); - } - break :blk .unknown; - }; - - try jws.objectField("mimeType"); - try jws.write(mime.contentTypeString()); - try jws.objectField("charset"); - try jws.write(mime.charsetString()); - } - - { - try jws.objectField("timing"); - try jws.write(.{ - .requestTime = transfer.start_time, - .connectEnd = -1, - .connectStart = -1, - .dnsEnd = -1, - .dnsStart = -1, - .proxyEnd = -1, - .proxyStart = -1, - .receiveHeadersEnd = -1, - .receiveHeadersStart = -1, - .sendEnd = -1, - .sendStart = -1, - .sslEnd = -1, - .sslStart = -1, - }); - } - - { - // chromedp doesn't like having duplicate header names. It's pretty - // common to get these from a server (e.g. for Cache-Control), but - // Chrome joins these. So we have to too. - const arena = self.arena; - var it = transfer.responseHeaderIterator(); - var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; - while (it.next()) |hdr| { - const gop = try map.getOrPut(arena, hdr.name); - if (gop.found_existing) { - // yes, chrome joins multi-value headers with a \n - gop.value_ptr.* = try std.mem.join(arena, "\n", &.{ gop.value_ptr.*, hdr.value }); - } else { - gop.value_ptr.* = hdr.value; - } - } - - try jws.objectField("headers"); - try jws.write(std.json.ArrayHashMap([]const u8){ .map = map }); - } - try jws.endObject(); - } -}; - fn idFromRequestId(request_id: []const u8) !u64 { // The requesIid for the original document is its loaderId. if (!std.mem.startsWith(u8, request_id, "REQ-") and !std.mem.startsWith(u8, request_id, "LID-")) { diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 823a5314..fd868af1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -107,9 +107,6 @@ pub const InterceptContext = struct { request: Request, content_length: usize = 0, - auth_challenge: ?http.AuthChallenge = null, - tries: usize = 0, - fn startCallback(response: Response) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); log.debug(.http, "intercept start", .{ .url = self.request.params.url }); @@ -126,16 +123,6 @@ pub const InterceptContext = struct { self.content_length = response.contentLength() orelse 0; - switch (response.inner) { - .transfer => |t| { - const status = t.response_header.?.status; - if (status == 401 or status == 407) { - self.auth_challenge = Transfer.detectAuthChallenge(t._conn.?); - } - }, - else => {}, - } - self.request.params.notification.dispatch(.http_response_header_done, &.{ .request = &self.request, .response = &response, @@ -166,26 +153,6 @@ pub const InterceptContext = struct { .content_length = self.content_length, }); - // if (self.auth_challenge != null and self.tries < 10) { - // var wait_for_interception = false; - // self.request.params.notification.dispatch(.http_request_auth_required, &.{ - // .request = &self.request, - // .intercept_ctx = self, - // .wait_for_interception = &wait_for_interception, - // }); - - // if (wait_for_interception) { - // log.debug(.http, "intercept auth required", .{ - // .url = self.request.params.url, - // .intercepted = self.layer.intercepted, - // }); - // self.layer.intercepted += 1; - // self.tries += 1; - // // Don't forward done — CDP owns this now, will retry via continueWithAuth - // return; - // } - // } - self.request.params.notification.dispatch(.http_request_done, &.{ .request = &self.request, .content_length = self.content_length, From ca08f0c56dc2dad363c742e2f67007c39bcf4888 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:10:33 -0700 Subject: [PATCH 13/31] remove blocking from RequestParams --- src/browser/HttpClient.zig | 7 ------- src/browser/ScriptManager.zig | 2 -- src/network/layer/RobotsLayer.zig | 1 - 3 files changed, 10 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 7600ad5a..db3b98b9 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -871,13 +871,6 @@ pub const RequestParams = struct { notification: *Notification, timeout_ms: u32 = 0, - // This is only relevant for intercepted requests. If a request is flagged - // as blocking AND is intercepted, then it'll be up to us to wait until - // we receive a response to the interception. This probably isn't ideal, - // but it's harder for our caller (ScriptManager) to deal with this. One - // reason for that is the Http Client is already a bit CDP-aware. - blocking: bool = false, - const ResourceType = enum { document, xhr, diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index f707f390..12cc6cb5 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -292,7 +292,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .frame_id = frame._frame_id, .loader_id = frame._loader_id, .headers = headers, - .blocking = true, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, @@ -316,7 +315,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .frame_id = frame._frame_id, .loader_id = frame._loader_id, .headers = headers, - .blocking = false, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index eb38b720..aa3d0e5f 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -114,7 +114,6 @@ fn fetchRobotsThenRequest( .url = robots_url, .method = .GET, .headers = headers, - .blocking = false, .frame_id = req.params.frame_id, .loader_id = req.params.loader_id, .cookie_jar = req.params.cookie_jar, From 13cc122e26d4d86d3ee08f0b9238907d837379d1 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:10:40 -0700 Subject: [PATCH 14/31] remove InterceptState --- src/browser/HttpClient.zig | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index db3b98b9..c61c457a 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1089,14 +1089,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - const InterceptState = union(enum) { - not_intercepted, - pending, - @"continue", - abort: anyerror, - fulfilled, - }; - fn releaseConn(self: *Transfer) void { if (self._conn) |conn| { self.client.removeConn(conn); From 87eec578aaeddf8977eff08a8276b6eae3c86fee Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:40:43 -0700 Subject: [PATCH 15/31] use arena pool in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 31 ++++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index fd868af1..041af5f1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -23,9 +23,7 @@ const log = @import("../../log.zig"); const IS_DEBUG = builtin.mode == .Debug; const http = @import("../http.zig"); -const URL = @import("../../browser/URL.zig"); const Client = @import("../../browser/HttpClient.zig").Client; -const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; @@ -54,18 +52,21 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - var pre_wrap_req = in_req; - // Wrap callbacks to intercept notifications - const intercept_ctx = try pre_wrap_req.params.arena.allocator().create(InterceptContext); + const arena = try client.network.app.arena_pool.acquire(.small, "InterceptionLayer"); + errdefer client.network.app.arena_pool.release(arena); + + const intercept_ctx = try arena.create(InterceptContext); intercept_ctx.* = .{ - .forward = Forward.fromRequest(pre_wrap_req), + .arena = arena, + .client = client, + .forward = Forward.fromRequest(in_req), .layer = self, - .request = pre_wrap_req, + .request = in_req, }; var req = intercept_ctx.forward.wrapRequest( - pre_wrap_req, + in_req, intercept_ctx, .{ .start = InterceptContext.startCallback, @@ -102,6 +103,8 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { } pub const InterceptContext = struct { + arena: std.mem.Allocator, + client: *Client, forward: Forward, layer: *InterceptionLayer, request: Request, @@ -148,6 +151,8 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept done", .{ .url = self.request.params.url, .content_length = self.content_length, @@ -162,6 +167,8 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept error", .{ .url = self.request.params.url, .err = err, @@ -175,6 +182,8 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + defer self.client.network.app.arena_pool.release(self.arena); + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.request.params.notification.dispatch(.http_request_fail, &.{ .request = &self.request, @@ -192,7 +201,11 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) } self.intercepted -= 1; - return self.next.request(client, req); + self.next.request(client, req) catch |err| { + const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + client.network.app.arena_pool.release(ctx.arena); + return err; + }; } pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { From 175c2cc288b35dfb996f7542d520d9d90bddc1e7 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:48:21 -0700 Subject: [PATCH 16/31] ensure robots params have arena and request id --- src/network/layer/RobotsLayer.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index aa3d0e5f..11b7c8f3 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -111,6 +111,9 @@ fn fetchRobotsThenRequest( try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ + // We have to do this ourselves because we are not going through the top level `request`. + .arena = std.heap.ArenaAllocator.init(client.allocator), + .request_id = client.incrReqId(), .url = robots_url, .method = .GET, .headers = headers, From bb9e238f6c223bbc33528be3713cebb1624ed8dd Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 22:59:11 -0700 Subject: [PATCH 17/31] Requests now use arenas from the arena pool --- src/browser/HttpClient.zig | 23 +++++++++++++++-------- src/cdp/domains/fetch.zig | 8 ++++---- src/network/layer/InterceptionLayer.zig | 19 +++---------------- src/network/layer/RobotsLayer.zig | 17 ++++++++++------- 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index c61c457a..90d6b8ab 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -400,7 +400,10 @@ pub fn request(self: *Client, req: Request) !void { // Assign Request Id. var our_req = req; our_req.params.request_id = self.incrReqId(); - our_req.params.arena = ArenaAllocator.init(self.allocator); + + const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); + errdefer self.network.app.arena_pool.release(arena); + our_req.params.arena = arena; return self.entry_layer.request(self, our_req); } @@ -854,7 +857,7 @@ fn ensureNoActiveConnection(self: *const Client) !void { pub const RequestParams = struct { /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. - arena: ArenaAllocator = undefined, + arena: Allocator = undefined, /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. request_id: u32 = undefined, @@ -893,7 +896,6 @@ pub const RequestParams = struct { pub fn deinit(self: *const RequestParams) void { self.headers.deinit(); - self.arena.deinit(); } }; @@ -918,7 +920,7 @@ pub const Request = struct { pub fn getCookieString(self: *Request) !?[:0]const u8 { const jar = self.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.params.arena.allocator()); + var aw: std.Io.Writer.Allocating = .init(self.params.arena); try jar.forRequest(self.params.url, &aw.writer, .{ .is_http = true, .origin_url = self.params.cookie_origin, @@ -1102,7 +1104,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.deinit(); + self.client.deinitRequest(self.req); self.client.transfer_pool.destroy(self); } @@ -1266,7 +1268,7 @@ pub const Transfer = struct { fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.req.params.arena.allocator(); + const arena = transfer.req.params.arena; transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1443,7 +1445,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {}; } } @@ -1456,7 +1458,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.req.params.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; @@ -1568,6 +1570,11 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { return self.process(transfer); } +pub fn deinitRequest(self: *Client, req: Request) void { + req.deinit(); + self.network.app.arena_pool.release(req.params.arena); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 7eff321e..672e4b92 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -255,7 +255,7 @@ fn continueRequest(cmd: *CDP.Command) !void { .new_url = params.url, }); - const arena = request.params.arena.allocator(); + const arena = request.params.arena; // Update the request with the new parameters if (params.url) |url| { request.params.url = try arena.dupeZ(u8, url); @@ -313,7 +313,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { const request_id = try idFromRequestId(params.requestId); const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; const transfer = pending.transfer; - var request = transfer.req; + const request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", @@ -331,7 +331,7 @@ fn continueWithAuth(cmd: *CDP.Command) !void { // cancel the request, deinit the transfer on error. errdefer transfer.abortAuthChallenge(); - const arena = request.params.arena.allocator(); + const arena = request.params.arena; transfer.updateCredentials(try std.fmt.allocPrintSentinel( arena, "{s}:{s}", @@ -380,7 +380,7 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try request.params.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 041af5f1..696ad5de 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -209,14 +209,12 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) } pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { - _ = client; - if (comptime IS_DEBUG) { log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; - defer req.deinit(); + defer client.deinitRequest(req); req.error_callback(req.ctx, error.Abort); } @@ -253,7 +251,7 @@ fn fulfillInner( pub fn fulfillRequest( self: *InterceptionLayer, - _: *Client, + client: *Client, req: Request, status: u16, headers: []const http.Header, @@ -264,21 +262,10 @@ pub fn fulfillRequest( } self.intercepted -= 1; - defer req.deinit(); + defer client.deinitRequest(req); fulfillInner(req, status, headers, body) catch |err| { req.error_callback(req.ctx, err); return err; }; } - -pub fn abortAuthChallenge(self: *InterceptionLayer, req: Request) void { - if (comptime IS_DEBUG) { - log.debug(.http, "abort auth transfer", .{ .intercepted = self.intercepted }); - } - - self.intercepted -= 1; - defer req.deinit(); - req.error_callback(req.ctx, error.AbortAuthChallenge); - return; -} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 11b7c8f3..92303a68 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -66,7 +66,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - defer req.deinit(); + defer client.deinitRequest(req); log.warn(.http, "blocked by robots", .{ .url = req.params.url }); req.error_callback(req.ctx, error.RobotsBlocked); @@ -108,11 +108,14 @@ fn fetchRobotsThenRequest( const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.fetchRobots"); + errdefer client.network.app.arena_pool.release(new_arena); + try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ // We have to do this ourselves because we are not going through the top level `request`. - .arena = std.heap.ArenaAllocator.init(client.allocator), + .arena = new_arena, .request_id = client.incrReqId(), .url = robots_url, .method = .GET, @@ -145,24 +148,24 @@ fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, a for (queued.value.items) |queued_req| { if (!allowed) { log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); } else { self.next.request(client, queued_req) catch |e| { - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); queued_req.error_callback(queued_req.ctx, e); }; } } } -fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void { var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPendingShutdown: missing queue"); defer queued.value.deinit(self.allocator); for (queued.value.items) |queued_req| { - defer queued_req.deinit(); + defer client.deinitRequest(queued_req); if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); } } @@ -265,6 +268,6 @@ const RobotsContext = struct { defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); - l.flushPendingShutdown(robots_url); + l.flushPendingShutdown(robots_url, client); } }; From d14b75d93bb39d14e284790271014f9e4dba4022 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:09:08 -0700 Subject: [PATCH 18/31] use Request arnea in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 696ad5de..81b231f4 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -53,12 +53,8 @@ pub fn layer(self: *InterceptionLayer) Layer { fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); - const arena = try client.network.app.arena_pool.acquire(.small, "InterceptionLayer"); - errdefer client.network.app.arena_pool.release(arena); - - const intercept_ctx = try arena.create(InterceptContext); + const intercept_ctx = try in_req.params.arena.create(InterceptContext); intercept_ctx.* = .{ - .arena = arena, .client = client, .forward = Forward.fromRequest(in_req), .layer = self, @@ -103,7 +99,6 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { } pub const InterceptContext = struct { - arena: std.mem.Allocator, client: *Client, forward: Forward, layer: *InterceptionLayer, @@ -151,7 +146,6 @@ pub const InterceptContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept done", .{ .url = self.request.params.url, @@ -167,7 +161,6 @@ pub const InterceptContext = struct { fn errorCallback(ctx: *anyopaque, err: anyerror) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept error", .{ .url = self.request.params.url, @@ -182,7 +175,6 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); self.request.params.notification.dispatch(.http_request_fail, &.{ @@ -203,7 +195,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) self.intercepted -= 1; self.next.request(client, req) catch |err| { const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); - client.network.app.arena_pool.release(ctx.arena); + ctx.client.deinitRequest(req); return err; }; } From fc702794c2ea55e84345f7d5748f1fc3e73ff8cc Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:09:53 -0700 Subject: [PATCH 19/31] use Request Arena in WebBotAuthLayer --- src/network/layer/WebBotAuthLayer.zig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 25260872..6367e7e8 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -42,9 +42,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); - const arena = try client.network.app.arena_pool.acquire(.small, "WebBotAuthLayer"); - defer client.network.app.arena_pool.release(arena); - + const arena = req.params.arena; const authority = URL.getHost(req.params.url); try wba.signRequest(arena, &our_req.params.headers, authority); From e56036fb50dfe28d10461b0e93e7a0e15a72164a Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:11:02 -0700 Subject: [PATCH 20/31] use Request Arena in CacheLayer --- src/network/layer/CacheLayer.zig | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 317457b5..cb5b3c77 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -52,8 +52,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { return self.next.request(client, req); } - const arena = try network.app.arena_pool.acquire(.small, "CacheLayer"); - errdefer network.app.arena_pool.release(arena); + const arena = req.params.arena; var iter = req.params.headers.iterator(); const req_header_list = try iter.collect(arena); @@ -63,8 +62,7 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { - defer req.deinit(); - defer network.app.arena_pool.release(arena); + defer client.deinitRequest(req); return serveFromCache(req, &cached); } @@ -208,8 +206,6 @@ const CacheContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); - const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); if (self.pending_metadata) |metadata| { @@ -227,13 +223,11 @@ const CacheContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardShutdown(); } fn errorCallback(ctx: *anyopaque, e: anyerror) void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - defer self.client.network.app.arena_pool.release(self.arena); self.forward.forwardErr(e); } }; From 152a792c180ba8a8a1717a51579c2bf10c56f942 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Sun, 26 Apr 2026 23:12:20 -0700 Subject: [PATCH 21/31] use Request Arena in RobotsLayer --- src/network/layer/RobotsLayer.zig | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 92303a68..6ac36890 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -53,14 +53,10 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); - const arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer"); - errdefer client.network.app.arena_pool.release(arena); - + const arena = req.params.arena; const robots_url = try URL.getRobotsUrl(arena, req.params.url); if (client.network.robot_store.get(robots_url)) |robot_entry| { - defer client.network.app.arena_pool.release(arena); - switch (robot_entry) { .present => |robots| { const path = URL.getPathname(req.params.url); From c719a522b8ff50eb1f682d3c0c41832fc09bfdf4 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 06:54:46 -0700 Subject: [PATCH 22/31] use lightpanda module log in layers --- src/network/layer/CacheLayer.zig | 3 ++- src/network/layer/InterceptionLayer.zig | 3 ++- src/network/layer/RobotsLayer.zig | 3 ++- src/network/layer/WebBotAuthLayer.zig | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index cb5b3c77..3db0a3c1 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const http = @import("../http.zig"); const Client = @import("../../browser/HttpClient.zig").Client; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 81b231f4..c75ef1a1 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -18,7 +18,8 @@ const std = @import("std"); const builtin = @import("builtin"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const IS_DEBUG = builtin.mode == .Debug; diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 6ac36890..d4d5783a 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const URL = @import("../../browser/URL.zig"); const Robots = @import("../Robots.zig"); diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 6367e7e8..7e67af49 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -17,7 +17,8 @@ // along with this program. If not, see . const std = @import("std"); -const log = @import("../../log.zig"); +const lp = @import("lightpanda"); +const log = lp.log; const URL = @import("../../browser/URL.zig"); const WebBotAuth = @import("../WebBotAuth.zig"); From 83b047e66a0a686b5956ab2e1fae5114b518f9e6 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 06:59:16 -0700 Subject: [PATCH 23/31] assert that intercepted isn't 0 before decrementing --- src/browser/HttpClient.zig | 1 + src/network/layer/InterceptionLayer.zig | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 90d6b8ab..b8985fb4 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1563,6 +1563,7 @@ pub const Transfer = struct { pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { if (comptime IS_DEBUG) { + lp.assert(self.interception_layer.intercepted > 0, "HttpClient.continueTransfer", .{ .value = self.interception_layer.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); } diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index c75ef1a1..79779427 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -190,6 +190,7 @@ pub const InterceptContext = struct { pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); } @@ -203,6 +204,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted }); log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; @@ -251,6 +253,7 @@ pub fn fulfillRequest( body: ?[]const u8, ) !void { if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); } From 4de1dc54248cdf8ca6bbb7d1680a87ab3dd6a9a2 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:03:00 -0700 Subject: [PATCH 24/31] properly call error callback in InterceptionLayer --- src/network/layer/InterceptionLayer.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 79779427..23b34217 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -90,7 +90,11 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { }); if (!wait_for_interception) { - return self.next.request(client, req); + return self.next.request(client, req) catch |err| { + req.error_callback(req.ctx, err); + client.deinitRequest(req); + return err; + }; } self.intercepted += 1; @@ -197,6 +201,7 @@ pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) self.intercepted -= 1; self.next.request(client, req) catch |err| { const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + req.error_callback(req.ctx, err); ctx.client.deinitRequest(req); return err; }; From 3fe774fbfb388bb5f44792afd353fceb747a9e42 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:20:06 -0700 Subject: [PATCH 25/31] pass error all the way up to Layer chain to clean --- src/browser/HttpClient.zig | 7 +++++-- src/network/layer/CacheLayer.zig | 4 +--- src/network/layer/InterceptionLayer.zig | 8 ++------ src/network/layer/RobotsLayer.zig | 7 +------ 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b8985fb4..12b90388 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -402,10 +402,13 @@ pub fn request(self: *Client, req: Request) !void { our_req.params.request_id = self.incrReqId(); const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); - errdefer self.network.app.arena_pool.release(arena); our_req.params.arena = arena; - return self.entry_layer.request(self, our_req); + return self.entry_layer.request(self, our_req) catch |err| { + our_req.error_callback(our_req.ctx, err); + self.deinitRequest(our_req); + return err; + }; } const SyncContext = struct { diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 3db0a3c1..fd8a5e9f 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -63,7 +63,6 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { - defer client.deinitRequest(req); return serveFromCache(req, &cached); } @@ -104,8 +103,7 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { const proceed = try req.header_callback(response); if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; + return error.Abort; } switch (cached.data) { diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 23b34217..ce399294 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -90,11 +90,7 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { }); if (!wait_for_interception) { - return self.next.request(client, req) catch |err| { - req.error_callback(req.ctx, err); - client.deinitRequest(req); - return err; - }; + return self.next.request(client, req); } self.intercepted += 1; @@ -214,8 +210,8 @@ pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) voi } self.intercepted -= 1; - defer client.deinitRequest(req); req.error_callback(req.ctx, error.Abort); + client.deinitRequest(req); } fn fulfillInner( diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index d4d5783a..96ca1b18 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -63,11 +63,8 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { const path = URL.getPathname(req.params.url); if (!robots.isAllowed(path)) { - defer client.deinitRequest(req); - log.warn(.http, "blocked by robots", .{ .url = req.params.url }); - req.error_callback(req.ctx, error.RobotsBlocked); - return; + return error.RobotsBlocked; } }, .absent => {}, @@ -85,8 +82,6 @@ fn fetchRobotsThenRequest( robots_url: [:0]const u8, req: Request, ) !void { - errdefer client.network.app.arena_pool.release(arena); - const entry = try self.pending.getOrPut(self.allocator, robots_url); if (!entry.found_existing) { From 1370f6805b0dbce871670770be2d9ae79c827f6f Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:21:57 -0700 Subject: [PATCH 26/31] add a note about cdp callback cb --- src/network/layer/InterceptionLayer.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index ce399294..165cd254 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -187,6 +187,8 @@ pub const InterceptContext = struct { }; // CDP Callbacks +// These handle their own clean up on errors with `self.next.request`. +// This is because they don't pass their error up the chain as they are async callbacks. pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { if (comptime IS_DEBUG) { From 0ecf981f7e34584bc18c31dbc513cf8bcdce88c5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:24:52 -0700 Subject: [PATCH 27/31] add assert to SyncRequest headerCallback --- src/browser/HttpClient.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 12b90388..dfafa683 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -425,6 +425,7 @@ const SyncContext = struct { fn headerCallback(response: Response) anyerror!bool { const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + lp.assert(response.status() != null, "HttpClient.SyncRequest.headerCallback", .{ .value = response.status() }); self.status = response.status().?; if (response.contentLength()) |cl| { try self.body.ensureTotalCapacity(self.allocator, cl); From 24ece021e1fb36889cbc6ead43cd7a1d874f8746 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:29:03 -0700 Subject: [PATCH 28/31] remove interception stuff in Transfer --- src/browser/HttpClient.zig | 63 ++++---------------------------------- 1 file changed, 6 insertions(+), 57 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index dfafa683..a810c535 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -1475,64 +1475,13 @@ pub const Transfer = struct { } pub fn responseHeaderIterator(self: *Transfer) HeaderIterator { - if (self._conn) |conn| { - // If we have a connection, than this is a real curl request and we - // iterate through the header that curl maintains. - return .{ .curl = .{ .conn = conn } }; - } - // If there's no handle, it either means this is being called before - // the request is even being made (which would be a bug in the code) - // or when a response was injected via transfer.fulfill. The injected - // header should be iterated, since there is no handle/easy. - return .{ .list = .{ .list = self.response_header.?._injected_headers } }; - } + // We always have a real curl request here. We handle injection up in InterceptionLayer. + lp.assert(self._conn != null, "Transfer.responseHeaderIterator", .{ .value = self._conn != null }); + const conn = self._conn.?; - pub fn fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (transfer._conn != null) { - // should never happen, should have been intercepted/paused, and then - // either continued, aborted or fulfilled once. - @branchHint(.unlikely); - return error.RequestInProgress; - } - - transfer._fulfill(status, headers, body) catch |err| { - transfer.req.error_callback(transfer.req.ctx, err); - return err; - }; - } - - fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - const req = &transfer.req; - if (req.start_callback) |cb| { - try cb(Response.fromTransfer(transfer)); - } - - transfer.response_header = .{ - .status = status, - .url = req.params.url, - .redirect_count = 0, - ._injected_headers = headers, - }; - for (headers) |hdr| { - if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) { - const len = @min(hdr.value.len, ResponseHead.MAX_CONTENT_TYPE_LEN); - @memcpy(transfer.response_header.?._content_type[0..len], hdr.value[0..len]); - transfer.response_header.?._content_type_len = len; - break; - } - } - - lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(Response.fromTransfer(transfer)) == false) { - transfer.abort(error.Abort); - return; - } - - if (body) |b| { - try req.data_callback(Response.fromTransfer(transfer), b); - } - - try req.done_callback(req.ctx); + // If we have a connection, than this is a real curl request and we + // iterate through the header that curl maintains. + return .{ .curl = .{ .conn = conn } }; } // This function should be called during the dataCallback. Calling it after From 1ab445843c5c5c03314fcba54f703ac67a9c67db Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:40:14 -0700 Subject: [PATCH 29/31] better arena management in Robots Layer and Context --- src/network/layer/RobotsLayer.zig | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 96ca1b18..1bfae1b6 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -72,13 +72,12 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { return self.next.request(client, req); } - return self.fetchRobotsThenRequest(client, arena, robots_url, req); + return self.fetchRobotsThenRequest(client, robots_url, req); } fn fetchRobotsThenRequest( self: *RobotsLayer, client: *Client, - arena: std.mem.Allocator, robots_url: [:0]const u8, req: Request, ) !void { @@ -88,11 +87,16 @@ fn fetchRobotsThenRequest( errdefer std.debug.assert(self.pending.remove(robots_url)); entry.value_ptr.* = .empty; - const robots_ctx = try arena.create(RobotsContext); + // This arena is later owned by the Request. It does not need to be cleaned up by us because + // it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers. + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext"); + errdefer client.network.app.arena_pool.release(new_arena); + + const robots_ctx = try new_arena.create(RobotsContext); robots_ctx.* = .{ .layer = self, .client = client, - .arena = arena, + .arena = new_arena, .robots_url = robots_url, .buffer = .empty, }; @@ -100,9 +104,6 @@ fn fetchRobotsThenRequest( const headers = try client.newHeaders(); log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.fetchRobots"); - errdefer client.network.app.arena_pool.release(new_arena); - try self.next.request(client, .{ .ctx = robots_ctx, .params = .{ @@ -125,8 +126,6 @@ fn fetchRobotsThenRequest( .error_callback = RobotsContext.errorCallback, .shutdown_callback = RobotsContext.shutdownCallback, }); - } else { - client.network.app.arena_pool.release(arena); } try entry.value_ptr.append(self.allocator, req); @@ -202,7 +201,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); var allowed = true; const network = client.network; @@ -246,7 +244,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); log.warn(.http, "robots fetch failed", .{ .err = err }); l.flushPending(client, robots_url, true); @@ -257,7 +254,6 @@ const RobotsContext = struct { const l = self.layer; const client = self.client; const robots_url = self.robots_url; - defer client.network.app.arena_pool.release(self.arena); log.debug(.http, "robots fetch shutdown", .{}); l.flushPendingShutdown(robots_url, client); From 85a5c0f927c5837b4a1f056e5190f94e8c0291e5 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 27 Apr 2026 07:50:57 -0700 Subject: [PATCH 30/31] decrement intercepted and properly deinit on BrowserContext deinit --- src/cdp/CDP.zig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 326e93c5..e646f4b8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,13 +450,22 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager + const http_client = browser.http_client; for (self.intercept_state.pendingIntercepts()) |intercept| { + defer { + lp.assert( + http_client.interception_layer.intercepted > 0, + "BrowserContext.deinit.intercepted", + .{ .value = http_client.interception_layer.intercepted }, + ); + http_client.interception_layer.intercepted -= 1; + } switch (intercept) { .transfer => |t| { t.abort(error.ClientDisconnect); }, .request => |r| { - defer r.deinit(); + defer http_client.deinitRequest(r); r.error_callback(r.ctx, error.ClientDisconnect); }, } From 1057b9de8d59abae399e8b4c466d6464d2cdcf34 Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Tue, 28 Apr 2026 06:24:13 -0700 Subject: [PATCH 31/31] toRequestId2 -> toRequestId on CDP --- src/cdp/domains/fetch.zig | 4 ++-- src/cdp/domains/network.zig | 8 ++++---- src/cdp/id.zig | 14 +------------- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 672e4b92..edff1761 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -214,7 +214,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId2(request), // matches the Network REQ-ID + .networkId = &id.toRequestId(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ @@ -446,7 +446,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId2(&request), + .networkId = &id.toRequestId(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 1458bb0c..0554681a 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -262,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId2(msg.request), + .requestId = &id.toRequestId(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -287,7 +287,7 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .loaderId = &id.toLoaderId(req.params.loader_id), .type = req.params.resource_type.string(), .documentURL = frame.url, @@ -310,7 +310,7 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ .frameId = &id.toFrameId(req.params.frame_id), - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .loaderId = &id.toLoaderId(req.params.loader_id), .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo @@ -323,7 +323,7 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request const session_id = bc.session_id orelse return; const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId2(req), + .requestId = &id.toRequestId(req), .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } diff --git a/src/cdp/id.zig b/src/cdp/id.zig index cb304f6c..f6889d24 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -40,20 +40,8 @@ pub fn toLoaderId(id: u32) [14]u8 { // requestId has special requirements. If it's the main document navigation, // then it should match the loader id. -const Transfer = @import("../browser/HttpClient.zig").Transfer; -pub fn toRequestId(transfer: *const Transfer) [14]u8 { - const req = transfer.req; - if (req.params.resource_type == .document) { - return toLoaderId(req.params.loader_id); - } - - var buf: [14]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable; - return buf; -} - const Request = @import("../browser/HttpClient.zig").Request; -pub fn toRequestId2(req: *const Request) [14]u8 { +pub fn toRequestId(req: *const Request) [14]u8 { if (req.params.resource_type == .document) { return toLoaderId(req.params.loader_id); }