diff --git a/src/Notification.zig b/src/Notification.zig index 7079c658..6d65eb3d 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,9 +21,7 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; -const Request = @import("browser/HttpClient.zig").Request; const Response = @import("browser/HttpClient.zig").Response; -const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext; const log = lp.log; const List = std.DoublyLinkedList; @@ -170,11 +168,11 @@ pub const FrameLoaded = struct { }; pub const RequestStart = struct { - request: *Request, + transfer: *Transfer, }; pub const RequestIntercept = struct { - request: *Request, + transfer: *Transfer, wait_for_interception: *bool, }; @@ -185,21 +183,21 @@ pub const RequestAuthRequired = struct { pub const ResponseData = struct { data: []const u8, - request: *Request, + transfer: *Transfer, }; pub const ResponseHeaderDone = struct { - request: *Request, + transfer: *Transfer, response: *const Response, }; pub const RequestDone = struct { - request: *Request, + transfer: *Transfer, content_length: usize, }; pub const RequestFail = struct { - request: *Request, + transfer: *Transfer, err: anyerror, }; diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index e5ac2d9e..1c7f0bfd 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -154,6 +154,8 @@ _to_load: *std.ArrayList(*Element.Html) = undefined, _style_manager: StyleManager, _script_manager: ScriptManager, +_http_owner: HttpClient.Owner = .{}, + // List of active live ranges (for mutation updates per DOM spec) _live_ranges: std.DoublyLinkedList = .{}, @@ -403,8 +405,7 @@ pub fn deinit(self: *Frame) void { const browser = page.session.browser; - // don't abort pending frames. - browser.http_client.abortFrame(self._frame_id, .{}); + browser.http_client.abortOwner(&self._http_owner); browser.env.destroyContext(self.js); @@ -632,7 +633,7 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo // Session.initiateRootNavigation) flags both the notification and the // HTTP request itself: CDP skips its node-registry reset until commit, // and the in-flight transfer survives the OLD page's frame.deinit which - // calls http_client.abortFrame(frame_id) on the shared frame_id during + // calls http_client.abortList() on the shared frame_id during // commitPendingPage. const is_pending_root = self._page._state == .pending; @@ -656,7 +657,7 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo session.navigation._current_navigation_kind = opts.kind; - http_client.request(.{ + self.makeRequest(.{ .ctx = self, .params = .{ .url = self.url, @@ -669,7 +670,6 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo .cookie_origin = self.url, .resource_type = .document, .notification = self._session.notification, - .protect_from_abort = is_pending_root, }, .header_callback = frameHeaderDoneCallback, .data_callback = frameDataCallback, @@ -760,7 +760,9 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url .type = target._type, }); - session.browser.http_client.abortFrame(target._frame_id, .{}); + // Navigation: kill in-flight HTTP transfers, but leave WebSockets + // alive — they're cross-document by spec. + session.browser.http_client.abortRequests(&target._http_owner); // Capture the originating frame's URL as the Referer for this // navigation. The originator's frame may be torn down before navigate() @@ -821,6 +823,19 @@ fn canScheduleNavigation(self: *Frame, new_target_type: NavigationType) bool { }; } +pub fn makeRequest(self: *Frame, req: HttpClient.Request) !void { + return self._session.browser.http_client.request(req, &self._http_owner); +} + +// Synchronously abort every transfer and WebSocket owned by this frame +// and all of its descendants. +pub fn abortTransfers(self: *Frame) void { + for (self.child_frames.items) |child| { + child.abortTransfers(); + } + self._session.browser.http_client.abortOwner(&self._http_owner); +} + pub fn documentIsLoaded(self: *Frame) void { if (self._load_state != .parsing) { // Ideally, documentIsLoaded would only be called once, but if a @@ -971,20 +986,8 @@ fn frameHeaderDoneCallback(response: HttpClient.Response) !bool { // frame_remove (clears OLD V8 context group + CDP node_registry), // tears down the OLD page, flips the pointer, and dispatches // frame_created against the new (now active) frame. - // - // The OLD page's frame.deinit calls http_client.abortFrame(frame_id) on - // the frame_id it shares with the (now-active) pending page; our transfer - // survives because Session.initiateRootNavigation flagged the request - // protect_from_abort, which abortFrame's default .normal scope honors. - // Once we are past commit, that protection is no longer needed and may - // interfere with subsequent aborts (e.g. another navigation while we are - // still streaming the body), so clear it. if (self._page._state == .pending) { try self._session.commitPendingPage(); - switch (response.inner) { - .transfer => |t| t.req.params.protect_from_abort = false, - .fulfilled, .cached => {}, - } } const response_url = response.url(); diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 7e1c5abc..8b78cf4c 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -20,14 +20,16 @@ const std = @import("std"); const lp = @import("lightpanda"); const builtin = @import("builtin"); -const URL = @import("URL.zig"); +const ArenaPool = @import("../ArenaPool.zig"); const Notification = @import("../Notification.zig"); +const timestamp = @import("../datetime.zig").timestamp; + +const URL = @import("URL.zig"); const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); -const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); -const timestamp = @import("../datetime.zig").timestamp; +const Network = @import("../network/Network.zig"); const log = lp.log; const posix = std.posix; @@ -46,7 +48,7 @@ pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); -// This is loosely tied to a browser Page. Loading all the , doing +// This is loosely tied to a browser Frame. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app // currently supports 1 browser and 1 frame at-a-time, we only have 1 Client and // re-use it from frame to frame. This allows us better re-use of the various @@ -55,7 +57,7 @@ pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); // The app has other secondary http needs, like telemetry. While we want to // share some things (namely the ca blob, and maybe some configuration // (TODO: ??? should proxy settings be global ???)), we're able to call -// client.abortFrame() to abort the transfers being made by a frame, without +// client.abortList() to abort the transfers being made by a frame, without // impacting those other http requests. pub const Client = @This(); @@ -80,6 +82,13 @@ performing: bool = false, // Use to generate the next request ID next_request_id: u32 = 0, +// Every currently-alive Transfer indexed by its id. Maintained so cross- +// component code (CDP intercept state, future scheduling/debugging) can +// look up a transfer by id without holding a *Transfer that might dangle. +// Inserted in Client.request, removed in Transfer.deinit. The pointer is +// only valid for the lifetime of the entry. +transfers: std.AutoHashMapUnmanaged(u32, *Transfer) = .empty, + // When handles has no more available easys, requests get queued. queue: std.DoublyLinkedList = .{}, @@ -95,10 +104,7 @@ allocator: Allocator, network: *Network, -// Once we have a handle/easy to process a request with, we create a Transfer -// which contains the Request as well as any state we need to process the -// request. These will come and go with each request. -transfer_pool: std.heap.MemoryPool(Transfer), +arena_pool: *ArenaPool, // The current proxy. CDP can change it, changeProxy(null) restores // from config. @@ -135,11 +141,11 @@ pub const Layer = struct { vtable: *const VTable, pub const VTable = struct { - request: *const fn (*anyopaque, *Client, Request) anyerror!void, + request: *const fn (*anyopaque, *Transfer) anyerror!void, }; - pub fn request(self: Layer, client: *Client, req: Request) !void { - return self.vtable.request(self.ptr, client, req); + pub fn request(self: Layer, transfer: *Transfer) !void { + return self.vtable.request(self.ptr, transfer); } }; @@ -167,9 +173,6 @@ pub const CDPClient = struct { }; pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client: ?CDPClient) !void { - var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator); - errdefer transfer_pool.deinit(); - var handles = try http.Handles.init(network.config); errdefer handles.deinit(); @@ -179,7 +182,6 @@ pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client: .handles = handles, .network = network, .allocator = allocator, - .transfer_pool = transfer_pool, .cdp_client = cdp_client, .use_proxy = http_proxy != null, @@ -189,10 +191,11 @@ pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client: .max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32), .cache_layer = .{}, - .robots_layer = .{ .allocator = allocator }, + .robots_layer = .{ .allocator = allocator, .network = network }, .web_bot_auth_layer = .{}, .interception_layer = .{}, .entry_layer = undefined, + .arena_pool = &network.app.arena_pool, }; var next = self.layer(); @@ -220,10 +223,18 @@ pub fn deinit(self: *Client) void { self.abort(); self.handles.deinit(); - self.transfer_pool.deinit(); self.clearUserAgentOverride(); self.robots_layer.deinit(self.allocator); + self.transfers.deinit(self.allocator); +} + +// Look up a live transfer by its id. Returns null if the transfer has been +// destroyed. Use this — rather than holding *Transfer across yields — for +// any code path that's interleaved with the request lifecycle (CDP +// continueRequest/fulfill/abort, async cleanups). +pub fn findTransfer(self: *Client, id: u32) ?*Transfer { + return self.transfers.get(id); } pub fn layer(self: *Client) Layer { @@ -300,121 +311,152 @@ pub fn getUserAgent(self: *const Client) [:0]const u8 { return self.user_agent_override orelse self.network.config.http_headers.user_agent; } -const AbortOpts = struct { - scope: enum { normal, full } = .normal, -}; - pub fn abort(self: *Client) void { - self._abort(true, 0, .{ .scope = .full }); -} - -// abortFrame with .normal doesn't abort protect_from_abort requests. -// .full abort all relqtive requests. -pub fn abortFrame(self: *Client, frame_id: u32, opts: AbortOpts) void { - self._abort(false, frame_id, opts); -} - -// Written this way so that both abort and abortFrame can share the same code -// but abort can avoid the frame_id check at comptime. -fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32, opts: AbortOpts) void { - abortConnections(self.in_use, abort_all, frame_id, opts); - abortConnections(self.ready_queue, abort_all, frame_id, opts); - - { - var q = &self.queue; - var n = q.first; - while (n) |node| { - n = node.next; - const transfer: *Transfer = @fieldParentPtr("_node", node); - const params = transfer.req.params; - if (comptime abort_all) { - transfer.kill(); - } else if (params.frame_id == frame_id) { - if (opts.scope == .full or !params.protect_from_abort) { - q.remove(node); - transfer.kill(); - } - } - } + // Snapshot before killing: kill() -> deinit removes entries from + // self.transfers, which would invalidate a live iterator. + var snapshot = std.ArrayList(*Transfer).initCapacity(self.allocator, self.transfers.count()) catch @panic("OOM"); + defer snapshot.deinit(self.allocator); + var it = self.transfers.valueIterator(); + while (it.next()) |t| { + snapshot.appendAssumeCapacity(t.*); } - if (comptime abort_all) { - self.queue = .{}; - self.ready_queue = .{}; + for (snapshot.items) |t| { + t.kill(); } - if (comptime IS_DEBUG and abort_all) { - var it = self.in_use.first; - var leftover: usize = 0; - while (it) |node| : (it = node.next) { - const conn: *http.Connection = @fieldParentPtr("node", node); - switch (conn.transport) { - .http => |transfer| std.debug.assert(transfer.aborted), - .websocket => {}, - .none => {}, - } - leftover += 1; - } - std.debug.assert(self.http_active == leftover); + // After the kill loop, every internal list should drain itself via + // each transfer's deinit: + // - self.transfers : transfers.remove(self.id) + // - self.queue : unlinked if _queued is set + // - self.in_use / self.ready_queue : via removeConn + // - self.dirty : drained at end of each perform; nothing left here + // Any non-empty list means a transfer escaped cleanup — assert so we + // catch the regression rather than silently leaking on next use. + if (comptime IS_DEBUG) { + std.debug.assert(self.transfers.size == 0); + std.debug.assert(self.queue.first == null); + std.debug.assert(self.in_use.first == null); + std.debug.assert(self.ready_queue.first == null); + std.debug.assert(self.dirty.first == null); } } -fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_id: u32, opts: AbortOpts) void { - var n = list.first; +// Kill every transfer + websocket owned by `owner`. Used when the owner +// (Frame / WorkerGlobalScope) is being torn down. After this returns, +// every WebSocket is fully gone; HTTP transfers that were mid-perform may +// still be on `owner.transfers` (Transfer.kill defers their deinit), but +// they've been unlinked from the owner list via kill()'s deferred branch +// so the owner is free to die. +pub fn abortOwner(self: *Client, owner: *Owner) void { + self.abortRequests(owner); + var n = owner.websockets.first; while (n) |node| { n = node.next; - const conn: *http.Connection = @fieldParentPtr("node", node); - switch (conn.transport) { - .http => |transfer| { - const params = transfer.req.params; - if (comptime abort_all) { - transfer.kill(); - } else if (params.frame_id == frame_id) { - if (opts.scope == .full or !params.protect_from_abort) { - transfer.kill(); - } - } - }, - .websocket => |ws| { - if ((comptime abort_all) or ws._frame._frame_id == frame_id) { - ws.kill(); - } - }, - .none => unreachable, - } + const ws: *@import("webapi/net/WebSocket.zig") = @fieldParentPtr("_owner_node", node); + ws.kill(); } + if (comptime IS_DEBUG) { + std.debug.assert(owner.websockets.first == null); + } +} + +// HTTP-only variant. WebSockets survive (they're cross-document by +// design). Used by the navigation path that aborts in-flight resource +// loads for a frame but lets its WebSockets keep running. +pub fn abortRequests(_: *Client, owner: *Owner) void { + var n = owner.transfers.first; + while (n) |node| { + n = node.next; + const t: *Transfer = @fieldParentPtr("owner_node", node); + t.kill(); + } + // owner.transfers may still have entries: Transfer.kill defers + // (flags `aborted` + noops callbacks) when called mid-perform and + // only fully deinits later via processOneMessage. The deferred-branch + // unlinks the node and clears Transfer.owner, so by the time the + // owner itself is freed, no orphan transfer points at it. } pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { while (self.queue.popFirst()) |queue_node| { + const transfer: *Transfer = @fieldParentPtr("_node", queue_node); const conn = self.network.getConnection() orelse { self.queue.prepend(queue_node); break; }; - - try self.makeRequest(conn, @fieldParentPtr("_node", queue_node)); + // Cleared only after we've successfully obtained a connection; + // if we put the node back, _queued stays true. + transfer._queued = false; + try self.makeRequest(conn, transfer); } return self.perform(@intCast(timeout_ms)); } -pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { - const self: *Client = @ptrCast(@alignCast(ptr)); - const transfer = try self.makeTransfer(req); - return self.process(transfer); +// last layer +pub fn _request(_: *anyopaque, transfer: *Transfer) !void { + return transfer.client.process(transfer); } -pub fn request(self: *Client, req: Request) !void { - // Assign Request Id. - var our_req = req; - our_req.params.request_id = self.incrReqId(); +// Ownership contract: from the moment this function is entered, the +// HttpClient owns `req` — specifically `req.params.headers` (a curl_slist). +// On success, transfer.deinit eventually frees it. On any failure path +// inside this function, we free it before returning the error. Callers +// must NOT pair `request()` with their own `errdefer headers.deinit()` +// — that's a double-free. +pub fn request(self: *Client, req: Request, owner: ?*Owner) !void { + const arena = self.arena_pool.acquire(.small, "Request.arena") catch |err| { + req.params.headers.deinit(); + return err; + }; - const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); - our_req.params.arena = arena; + const transfer = arena.create(Transfer) catch |err| { + req.params.headers.deinit(); + self.arena_pool.release(arena); + return err; + }; - return self.entry_layer.request(self, our_req) catch |err| { - our_req.error_callback(our_req.ctx, err); - self.deinitRequest(our_req); + transfer.* = .{ + .req = req, + .url = req.params.url, + .client = self, + // owner is set AFTER we've actually appended to the owner list, + // so transfer.deinit's `if (self.owner)` branch only fires when + // we're truly linked. Otherwise we'd try to remove a node from + // a list it was never in. + .owner = null, + .arena = arena, + .id = self.incrReqId(), + .start_time = timestamp(.monotonic), + .owner_node = .{}, + }; + + // From here, transfer owns req+arena. Any subsequent failure flows + // through transfer.deinit (or transfer.abort), which handles headers + // via req.deinit. Do NOT free headers directly past this point. + + // Register for id-based lookup. putNoClobber would fail if request_id + // collides (i.e. we've wrapped through 2^32 requests and the old + // transfer is still alive — practically never). + self.transfers.putNoClobber(self.allocator, transfer.id, transfer) catch |err| { + transfer.deinit(); + return err; + }; + + if (owner) |o| { + o.addTransfer(transfer); + transfer.owner = o; + } + + // From this point forward, the transfer owns `req` and `arena`. If the + // layer chain fails before any layer commits the transfer to an external + // owner (queue / multi handle / pending interception), we clean up here + // via transfer.abort which fires error_callback and deinits. + self.entry_layer.request(transfer) catch |err| { + if (!transfer.loop_owned) { + transfer.abort(err); + } return err; }; } @@ -474,7 +516,7 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) ! .done_callback = SyncContext.doneCallback, .error_callback = SyncContext.errorCallback, .shutdown_callback = SyncContext.shutdownCallback, - }); + }, null); while (sync_ctx.completion == .in_progress) { const status = try self.tick(200); @@ -511,6 +553,8 @@ fn process(self: *Client, transfer: *Transfer) !void { } self.queue.append(&transfer._node); + transfer._queued = true; + transfer.loop_owned = true; } pub fn nextReqId(self: *Client) u32 { @@ -523,37 +567,6 @@ pub fn incrReqId(self: *Client) u32 { return id; } -fn makeTransfer(self: *Client, req: Request) !*Transfer { - const transfer = try self.transfer_pool.create(); - errdefer self.transfer_pool.destroy(transfer); - - transfer.* = .{ - .start_time = timestamp(.monotonic), - .id = req.params.request_id, - .url = req.params.url, - .req = req, - .client = self, - }; - return transfer; -} - -fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void { - if (transfer._notified_fail) { - // we can force a failed request within a callback, which will eventually - // result in this being called again in the more general loop. We do this - // because we can raise a more specific error inside a callback in some cases - return; - } - - transfer._notified_fail = true; - - if (execute_callback) { - transfer.req.error_callback(transfer.req.ctx, err); - } else if (transfer.req.shutdown_callback) |cb| { - cb(transfer.req.ctx); - } -} - // Same restriction as changeProxy. Should be ok since this is only called on // BrowserContext deinit. pub fn restoreOriginalProxy(self: *Client) !void { @@ -573,27 +586,28 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer transfer._conn = conn; errdefer { transfer._conn = null; - transfer.deinit(); self.releaseConn(conn); } try transfer.configureConn(conn); } - // As soon as this is called, our "perform" loop is responsible for - // cleaning things up. That's why the above code is in a block. If anything - // fails BEFORE `curl_multi_add_handle` succeeds, the we still need to do - // cleanup. But if things fail after `curl_multi_add_handle`, we expect - // perform to pickup the failure and cleanup. + // As soon as trackConn succeeds, the multi handle owns the transfer's + // lifecycle. perform/processMessages will eventually invoke completion + // callbacks and call transfer.deinit. We flag loop_owned so Client.request + // (or anyone else holding the transfer pointer) knows not to deinit it. self.trackConn(conn) catch |err| { transfer._conn = null; - transfer.deinit(); return err; }; + transfer.loop_owned = true; if (transfer.req.start_callback) |cb| { cb(Response.fromTransfer(transfer)) catch |err| { - transfer.deinit(); + // We're now committed to the multi. transfer.abort fires the + // error_callback and tears down (removeConn handles the + // already-in-multi case via the dirty queue). + transfer.abort(err); return err; }; } @@ -868,11 +882,6 @@ fn ensureNoActiveConnection(self: *const Client) !void { } pub const RequestParams = struct { - /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. - arena: Allocator = undefined, - /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. - request_id: u32 = undefined, - frame_id: u32, loader_id: u32, method: Method, @@ -885,15 +894,7 @@ pub const RequestParams = struct { credentials: ?[:0]const u8 = null, notification: *Notification, timeout_ms: u32 = 0, - - // Set on an in-flight root-navigation transfer that was issued against a - // pending Page. The old Page's frame.deinit (called from Session.commit - // PendingPage when response headers arrive) calls abortFrame() on the - // shared frame_id; abortFrame's default .normal scope skips transfers - // with this flag so the callback chain we are sitting inside isn't killed - // mid-flight. Session.discardPendingPage uses .full scope to override - // the flag in failure paths. - protect_from_abort: bool = false, + skip_robots: bool = false, const ResourceType = enum { document, @@ -939,9 +940,9 @@ pub const Request = struct { error_callback: ErrorCallback, shutdown_callback: ?ShutdownCallback = null, - pub fn getCookieString(self: *Request) !?[:0]const u8 { + pub fn getCookieString(self: *Request, arena: Allocator) !?[:0]const u8 { const jar = self.params.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.params.arena); + var aw: std.Io.Writer.Allocating = .init(arena); try jar.forRequest(self.params.url, &aw.writer, .{ .is_http = true, .origin_url = self.params.cookie_origin, @@ -1069,6 +1070,26 @@ pub const SyncResponse = struct { pub const Transfer = struct { id: u32 = 0, + arena: Allocator, + + owner: ?*Owner, + owner_node: std.DoublyLinkedList.Node = .{}, + + // Latched true by the first commit point that hands the transfer off to + // an external owner: client.queue.append, successful trackConn, or + // InterceptionLayer pausing for a CDP response. Once set, Client.request's + // errdefer skips cleanup — whoever now owns the transfer will deinit it. + loop_owned: bool = false, + + // True iff `_node` is currently linked in `client.queue` (waiting for a + // libcurl handle). Set in `Client.process` on enqueue, cleared in + // `Client.tick` on popFirst, and used by `Transfer.deinit` to safely + // unlink — `deinit` has no other way to detect queue membership, and + // a transfer aborted while queued (e.g. via owner-list abort) would + // otherwise leave a dangling `_node` in `client.queue` that the next + // `tick` would dereference and hand to libcurl. + _queued: bool = false, + req: Request, url: [:0]const u8, client: *Client, @@ -1119,65 +1140,126 @@ pub const Transfer = struct { } } - fn deinit(self: *Transfer) void { + pub fn deinit(self: *Transfer) void { if (self._conn) |conn| { self.client.removeConn(conn); self._conn = null; } - self.client.deinitRequest(self.req); - self.client.transfer_pool.destroy(self); - } - - pub fn abort(self: *Transfer, err: anyerror) void { - self.requestFailed(err, true); - - if (self._performing or self.client.performing) { - // We're currently in a curl_multi_perform. We cannot call - // curl_multi_remove_handle from a curl callback. Instead, we flag - // this transfer and our callbacks will check for this flag. - self.aborted = true; - return; + // Unlink from client.queue if we were waiting for a handle. + // Without this, deinit'ing a queued transfer (e.g. via owner-list + // abort during navigation) leaves a dangling _node in the queue + // that the next tick would pop and hand to libcurl → UAF. + if (self._queued) { + self.client.queue.remove(&self._node); + self._queued = false; } - self.deinit(); + // Drop the id→*Transfer index entry before freeing the memory. + // Any concurrent CDP lookup by id will now see this transfer as gone. + _ = self.client.transfers.remove(self.id); + + self.req.deinit(); + if (self.owner) |o| { + o.removeTransfer(self); + } + // The Transfer itself lives on this arena, so this must be last — + // `self` is invalid memory after release. + const arena_pool = self.client.arena_pool; + const arena = self.arena; + arena_pool.release(arena); } - pub fn terminate(self: *Transfer) void { - self.requestFailed(error.Shutdown, false); - self.deinit(); + // Cancel this transfer with `err`. Fires error_callback once (latched + // via _notified_fail), then either deinits synchronously or, if we're + // mid-perform with a libcurl handle still in the multi, detaches and + // lets the natural processOneMessage flow deinit later. + // + // This is the ONE entry point external callers should use to cancel + // a transfer. Don't reach for kill() or requestFailed() directly — + // they're internal helpers. + pub fn abort(self: *Transfer, err: anyerror) void { + self.requestFailed(err, true); + self.detachOrDeinit(); } - // internal, when the frame is shutting down. Doesn't have the same ceremony - // as abort (doesn't send a notification, doesn't invoke an error callback) + // Owner-driven teardown: fires shutdown_callback (not error_callback) + // and otherwise behaves like abort. Called by Client.abortOwner / + // abortRequests when a Frame / WGS is being torn down. fn kill(self: *Transfer) void { if (self.req.shutdown_callback) |cb| { cb(self.req.ctx); } - - if (self._performing or self.client.performing) { - // We're currently inside of a callback. This client, and libcurl - // generally don't expect a transfer to become deinitialized during - // a callback. We can flag the transfer as aborted (which is what - // we do when transfer.abort() is called in this condition) AND, - // since this "kill()"should prevent any future callbacks, the best - // we can do is null/noop them. - self.aborted = true; - self.req.start_callback = null; - self.req.shutdown_callback = null; - self.req.header_callback = Noop.headerCallback; - self.req.data_callback = Noop.dataCallback; - self.req.done_callback = Noop.doneCallback; - self.req.error_callback = Noop.errorCallback; - return; - } - - self.deinit(); + self.detachOrDeinit(); } - // We can force a failed request within a callback, which will eventually - // result in this being called again in the more general loop. We do this - // because we can raise a more specific error inside a callback in some cases. + // Decide whether to tear down now or defer until processOneMessage + // eventually drains the in-flight curl handle. + // + // Two cases force deferral: + // * `_performing` — processOneMessage is currently processing THIS + // transfer (set/cleared around the callback chain). It will call + // `transfer.deinit` itself after the chain returns; deiniting + // here would double-free. Note that `_conn` is cleared partway + // through this window (the "release conn ASAP" step before + // done_callback fires), so we cannot rely on `_conn != null`. + // * `client.performing` + we have a libcurl handle — libcurl could + // still fire callbacks for us. Releasing the arena now would UAF + // from inside curl. + // + // Otherwise (parked / queued / never-trackConn'd / fully drained), + // there is nothing left referencing this transfer and we can safely + // deinit inline even from inside a perform callback. + fn detachOrDeinit(self: *Transfer) void { + const must_defer = self._performing or + (self.client.performing and self._conn != null); + if (must_defer) { + self.detachInPerform(); + } else { + self.deinit(); + } + } + + // Deferred-cleanup path when we can't synchronously deinit. + // + // We: + // - flag `aborted` so processOneMessage's normal-completion paths + // short-circuit when they next see this transfer, + // - noop every user callback so libcurl naturally draining the + // in-flight response can't re-enter user code, + // - unlink from owner.transfers and clear `owner` so the owning + // Frame/WGS can be freed while this transfer is still draining. + // transfer.deinit (called later by processOneMessage) sees + // `owner == null` and skips the list-remove that would otherwise + // UAF against a freed list. + fn detachInPerform(self: *Transfer) void { + self.aborted = true; + self.req.start_callback = null; + self.req.shutdown_callback = null; + self.req.header_callback = Noop.headerCallback; + self.req.data_callback = Noop.dataCallback; + self.req.done_callback = Noop.doneCallback; + self.req.error_callback = Noop.errorCallback; + if (self.owner) |o| { + o.removeTransfer(self); + self.owner = null; + } + } + + // Internal failure-notification helper. Latches via _notified_fail so + // multiple paths racing to report the same failure only fire one + // notification. Goes through transfer.req — so layer wrappers + // (InterceptContext, CacheContext) see the failure and can propagate + // it up the chain. + // + // Not part of the external API: callers cancelling a transfer should + // use transfer.abort(err) instead, which goes through this and also + // handles the deinit / detach side. The internal HttpClient flow uses + // this directly (from processOneMessage) because it's already paired + // with the natural processMessages → transfer.deinit handoff. + // + // execute_callback=true → fires error_callback. false → fires + // shutdown_callback (used by Frame shutdown / WGS teardown). fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void { if (self._notified_fail) return; self._notified_fail = true; @@ -1212,7 +1294,7 @@ pub const Transfer = struct { try conn.setHeaders(&header_list); // Add cookies from cookie jar. - if (try self.req.getCookieString()) |cookies| { + if (try self.req.getCookieString(self.arena)) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); } @@ -1289,7 +1371,7 @@ pub const Transfer = struct { fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.req.params.arena; + const arena = transfer.arena; transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1466,7 +1548,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.arena, cl) catch {}; } } @@ -1479,7 +1561,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.arena, chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; @@ -1541,11 +1623,6 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { return self.process(transfer); } -pub fn deinitRequest(self: *Client, req: Request) void { - req.deinit(); - self.network.app.arena_pool.release(req.params.arena); -} - const Noop = struct { fn headerCallback(_: Response) !bool { return true; @@ -1554,3 +1631,28 @@ const Noop = struct { fn doneCallback(_: *anyopaque) !void {} fn errorCallback(_: *anyopaque, _: anyerror) void {} }; + +// An opaque-from-the-outside handle that Frame / WorkerGlobalScope embed +// to track the HTTP transfers + WebSockets they own. +pub const Owner = struct { + transfers: std.DoublyLinkedList = .{}, + websockets: std.DoublyLinkedList = .{}, + + const WebSocket = @import("webapi/net/WebSocket.zig"); + + pub fn addTransfer(self: *Owner, t: *Transfer) void { + self.transfers.append(&t.owner_node); + } + + pub fn removeTransfer(self: *Owner, t: *Transfer) void { + self.transfers.remove(&t.owner_node); + } + + pub fn addWS(self: *Owner, ws: *WebSocket) void { + self.websockets.append(&ws._owner_node); + } + + pub fn removeWS(self: *Owner, ws: *WebSocket) void { + self.websockets.remove(&ws._owner_node); + } +}; diff --git a/src/browser/Page.zig b/src/browser/Page.zig index 0205f8da..f7d0bebc 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -119,10 +119,8 @@ queued_close: std.ArrayList(*Frame) = .empty, // destination of a root navigation — its V8 context exists but is not yet the // session's active context. Flipped to `.active` by Session.commitPendingPage // when response headers arrive. Frame.navigate / frameHeaderDoneCallback -// branch on this to: (a) stamp `is_pending_root` on the frame_navigate +// branch on this to stamp `is_pending_root` on the frame_navigate // notification (so CDP doesn't reset its node registry yet) and -// (b) flag the HTTP request `protect_from_abort` (so the old page's deinit -// can't kill the transfer we're sitting inside). _state: enum { active, pending } = .active, // Initialize a Page and its root Frame. diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 009235bb..c6da4d86 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -66,6 +66,9 @@ pub fn waitCDP(self: *Runner, opts: WaitOpts) !CDPWaitResult { } fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult { + const session = self.session; + const browser = session.browser; + var timer = try std.time.Timer.start(); const tick_opts = TickOpts{ @@ -85,8 +88,9 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult { if (gc_hint_timer.read() >= gc_hint_period_ns) { gc_hint_timer.reset(); self.frame._page.cleanupClosedPopups(); - self.session.browser.env.memoryPressureNotification(.moderate); + browser.env.memoryPressureNotification(.moderate); } + session.processQueuedDestroyed(); const tick_result = self._tick(is_cdp, tick_opts) catch |err| { switch (err) { diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index cc0880b4..bda3c6c9 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -256,7 +256,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e // Let the outer errdefer handle releasing the arena if client.request fails } - try self.base.client.request(.{ + try frame.makeRequest(.{ .ctx = script, .params = .{ .url = url, diff --git a/src/browser/ScriptManagerBase.zig b/src/browser/ScriptManagerBase.zig index b73121a6..cf9556c8 100644 --- a/src/browser/ScriptManagerBase.zig +++ b/src/browser/ScriptManagerBase.zig @@ -47,8 +47,7 @@ pub const Owner = union(enum) { pub fn url(self: Owner) [:0]const u8 { return switch (self) { - .frame => |f| f.url, - .worker => |w| w.url, + inline else => |g| g.url, }; } @@ -68,23 +67,26 @@ pub const Owner = union(enum) { pub fn session(self: Owner) *Session { return switch (self) { - .frame => |f| f._session, - .worker => |w| w._session, + inline else => |g| g._session, }; } pub fn jsContext(self: Owner) *js.Context { return switch (self) { - .frame => |f| f.js, - .worker => |w| w.js, + inline else => |g| g.js, }; } pub fn addHeaders(self: Owner, headers: *HttpClient.Headers) !void { - switch (self) { - .frame => |f| try f.headersForRequest(headers), - .worker => {}, - } + return switch (self) { + inline else => |g| g.headersForRequest(headers), + }; + } + + pub fn makeRequest(self: Owner, req: HttpClient.Request) !void { + return switch (self) { + inline else => |g| g.makeRequest(req), + }; } }; @@ -259,17 +261,18 @@ pub fn preloadImport(self: *ScriptManagerBase, url: [:0]const u8, referrer: []co // called). self.async_scripts.append(&script.node); - const session = self.owner.session(); - self.client.request(.{ + const owner = self.owner; + const session = owner.session(); + owner.makeRequest(.{ .ctx = script, .params = .{ .url = url, .method = .GET, - .frame_id = self.owner.frameId(), - .loader_id = self.owner.loaderId(), + .frame_id = owner.frameId(), + .loader_id = owner.loaderId(), .headers = try self.getHeaders(), .cookie_jar = &session.cookie_jar, - .cookie_origin = self.owner.url(), + .cookie_origin = owner.url(), .resource_type = .script, .notification = session.notification, }, @@ -364,19 +367,20 @@ pub fn getAsyncImport(self: *ScriptManagerBase, url: [:0]const u8, cb: ImportAsy self.is_evaluating = true; defer self.is_evaluating = was_evaluating; + const owner = self.owner; const session = self.owner.session(); self.async_scripts.append(&script.node); - self.client.request(.{ + owner.makeRequest(.{ .ctx = script, .params = .{ .url = url, .method = .GET, - .frame_id = self.owner.frameId(), - .loader_id = self.owner.loaderId(), + .frame_id = owner.frameId(), + .loader_id = owner.loaderId(), .headers = try self.getHeaders(), .resource_type = .script, .cookie_jar = &session.cookie_jar, - .cookie_origin = self.owner.url(), + .cookie_origin = owner.url(), .notification = session.notification, }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 0f83b95d..06dd8e42 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -74,6 +74,8 @@ _active: ?*Page = null, // In-flight root navigation _pending: ?*Page = null, +_queued_destroy: std.ArrayList(*Page) = .{}, + // IDs. Kept at Session level so IDs can remain unique across Page replacements. frame_id_gen: u32 = 0, loader_id_gen: u32 = 0, @@ -111,6 +113,8 @@ pub fn deinit(self: *Session) void { if (self._active != null) { self.removePage(); } + self.processQueuedDestroyed(); + self.cookie_jar.deinit(); // Force V8 to flush any remaining weak callbacks while @@ -124,6 +128,14 @@ pub fn deinit(self: *Session) void { self.arena_pool.release(self.arena); } +pub fn processQueuedDestroyed(self: *Session) void { + for (self._queued_destroy.items) |page| { + page.deinit(); + self.browser.page_pool.destroy(page); + } + self._queued_destroy.clearRetainingCapacity(); +} + // True iff there is an active Page. CDP / external callers should use this // (or `currentPage()`) rather than poking at the underlying field. pub fn hasPage(self: *const Session) bool { @@ -141,8 +153,7 @@ fn allocatePage(self: *Session, frame_id: u32) !*Page { // Tear down and free a Page allocated via allocatePage. fn destroyPage(self: *Session, page: *Page) void { - page.deinit(); - self.browser.page_pool.destroy(page); + self._queued_destroy.append(self.arena, page) catch @panic("OOM"); } // Tear down the currently-active Page. Dispatches `frame_remove` first @@ -163,6 +174,8 @@ fn tearDownActivePage(self: *Session) void { } return; }; + + page.frame.abortTransfers(); self.destroyPage(page); self._active = null; self.navigation.onRemoveFrame(); @@ -532,12 +545,6 @@ pub fn initiateRootNavigation(self: *Session, frame_id: u32, url: [:0]const u8, // response for the request we are committing was just inserted by // onHttpResponseHeadersDone moments earlier and must survive). // 4. pending_page = null. Order matters: step 3 reads it. -// 5. OLD Page.deinit + free LAST. Its frame.deinit calls -// http_client.abortFrame(frame_id) on the frame_id that the OLD -// page shares with the now-active pending page; the in-flight -// navigation transfer (whose callback we are inside) is shielded -// by protect_from_abort, which abortFrame's default .normal scope -// honors. The caller clears the flag AFTER we return. pub fn commitPendingPage(self: *Session) !void { const pending = self._pending orelse { lp.assert(false, "Session.commitPendingPage - no pending page", .{}); @@ -574,10 +581,12 @@ pub fn commitPendingPage(self: *Session) !void { // Step 5: tear down the OLD page LAST. Anything in steps 1-4 that // needed to walk the OLD page's state (CDP node_registry, inspector - // context group, isolated worlds) has already done so. The OLD page's - // frame.deinit calls http_client.abortFrame(frame_id) on the frame_id - // shared with the pending page; the in-flight transfer survives via - // protect_from_abort. + // context group, isolated worlds) has already done so. Kill any + // remaining transfers/websockets synchronously before queuing for + // deferred destroy — otherwise a still-inflight transfer firing its + // done_callback after this point would re-enter against the new + // _active and trip the half-torn-down session. + old_active.frame.abortTransfers(); self.destroyPage(old_active); } @@ -591,8 +600,9 @@ pub fn discardPendingPage(self: *Session) void { log.debug(.browser, "discard pending page", .{}); } - // Force abort all inflight queries. - self.browser.http_client.abortFrame(page.frame._frame_id, .{ .scope = .full }); + // Force abort all inflight queries (HTTP + WS) before queuing for + // deferred destroy. + page.frame.abortTransfers(); self._pending = null; self.destroyPage(page); diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 76bd4391..7635e9a2 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -587,7 +587,7 @@ test "Env: Worker context " { const frame = try session.createPage(); defer session.removePage(); - const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", &frame.js.execution); + const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", frame); var ls: js.Local.Scope = undefined; worker._worker_scope.js.localScope(&ls); diff --git a/src/browser/js/Execution.zig b/src/browser/js/Execution.zig index 15de46ce..c625f473 100644 --- a/src/browser/js/Execution.zig +++ b/src/browser/js/Execution.zig @@ -96,6 +96,22 @@ pub fn lookupBlobUrl(self: *const Execution, url: []const u8) ?*Blob { }; } +pub fn makeRequest(self: *const Execution, req: HttpClient.Request) !void { + return switch (self.context.global) { + inline else => |g| g.makeRequest(req), + }; +} + +// HttpClient.Owner of the current global (Frame or WGS). Used by code +// that needs to register an in-flight network operation against the +// owning scope without caring whether it's a Frame or a Worker — e.g. +// WebSocket.init appending to `.websockets`. +pub fn httpOwner(self: *const Execution) *HttpClient.Owner { + return switch (self.context.global) { + inline else => |g| &g._http_owner, + }; +} + pub fn dispatch( self: *const Execution, target: *EventTarget, diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 5d571fa6..a993a532 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -58,17 +58,13 @@ _on_error: ?js.Function.Global = null, _on_message: ?js.Function.Global = null, _on_messageerror: ?js.Function.Global = null, -pub fn init(url: []const u8, exec: *Execution) !*Worker { - const frame = switch (exec.context.global) { - .frame => |f| f, - .worker => return error.WorkerCannotCreateWorker, - }; +pub fn init(url: []const u8, frame: *Frame) !*Worker { const session = frame._session; const arena = try session.getArena(.large, "Worker"); errdefer session.releaseArena(arena); - const resolved_url = try URL.resolve(arena, exec.url.*, url, .{ .encoding = frame.charset }); + const resolved_url = try URL.resolve(arena, frame.base(), url, .{ .encoding = frame.charset }); const self = try frame._page.factory.eventTargetWithAllocator(arena, Worker{ ._arena = arena, ._proto = undefined, @@ -92,13 +88,13 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { return self; } - const http_client = &session.browser.http_client; - http_client.request(.{ + const headers = try session.browser.http_client.newHeaders(); + frame.makeRequest(.{ .ctx = self, .params = .{ - .url = resolved_url, .method = .GET, - .headers = try http_client.newHeaders(), + .headers = headers, + .url = resolved_url, .frame_id = self._frame_id, .loader_id = self._loader_id, .resource_type = .script, @@ -122,7 +118,6 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { // remove from the frame's worker list. pub fn deinit(self: *Worker) void { // No pending frame for workers, so we can abort all frames. - self._frame._session.browser.http_client.abortFrame(self._frame_id, .{ .scope = .full }); if (self._http_response) |res| { res.abort(error.Abort); self._http_response = null; diff --git a/src/browser/webapi/WorkerGlobalScope.zig b/src/browser/webapi/WorkerGlobalScope.zig index ea194382..1df74439 100644 --- a/src/browser/webapi/WorkerGlobalScope.zig +++ b/src/browser/webapi/WorkerGlobalScope.zig @@ -59,6 +59,8 @@ _page: *Page, _session: *Session, _factory: *Factory, _identity: JS.Identity = .{}, +_http_owner: HttpClient.Owner = .{}, + arena: Allocator, call_arena: Allocator, url: [:0]const u8, @@ -148,16 +150,21 @@ pub fn init(worker: *Worker, url: [:0]const u8) !*WorkerGlobalScope { } pub fn deinit(self: *WorkerGlobalScope) void { + const page = self._page; + const session = page.session; + const browser = session.browser; + + browser.http_client.abortOwner(&self._http_owner); + self._identity.deinit(); self._script_manager.deinit(); - const page = self._page; var it = self._blob_urls.valueIterator(); while (it.next()) |blob| { blob.*.releaseRef(page); } - page.session.browser.env.destroyContext(self.js); - page.releaseArena(self.call_arena); + browser.env.destroyContext(self.js); + session.releaseArena(self.call_arena); } pub fn base(self: *const WorkerGlobalScope) [:0]const u8 { @@ -210,6 +217,10 @@ pub fn lookupBlobUrl(self: *WorkerGlobalScope, url: []const u8) ?*Blob { return self._blob_urls.get(url); } +pub fn makeRequest(self: *WorkerGlobalScope, req: HttpClient.Request) !void { + return self._session.browser.http_client.request(req, &self._http_owner); +} + pub fn getSelf(self: *WorkerGlobalScope) *WorkerGlobalScope { return self; } diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index bb9e97a7..5d58a1a5 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -99,7 +99,7 @@ pub fn init(input: Input, options: ?InitOpts, exec: *const Execution) !js.Promis // httpErrorCallback by Client.request, which rejects the promise and // releases response._arena. Propagating the error from here would also // fire the `errdefer response.deinit` above and double-free the arena. - http_client.request(.{ + exec.makeRequest(.{ .ctx = fetch, .params = .{ .url = request._url, diff --git a/src/browser/webapi/net/WebSocket.zig b/src/browser/webapi/net/WebSocket.zig index 4677aed8..1d2d6cad 100644 --- a/src/browser/webapi/net/WebSocket.zig +++ b/src/browser/webapi/net/WebSocket.zig @@ -58,6 +58,8 @@ _conn: ?*http.Connection, _http_client: *HttpClient, _req_headers: http.Headers, +_owner_node: std.DoublyLinkedList.Node = .{}, + // buffered outgoing messages _send_queue: std.ArrayList(Message) = .empty, _send_offset: usize = 0, @@ -148,6 +150,7 @@ pub fn init(url: []const u8, protocols: [][]const u8, frame: *Frame) !*WebSocket }); conn.transport = .{ .websocket = self }; try http_client.trackConn(conn); + frame._http_owner.addWS(self); if (comptime IS_DEBUG) { log.info(.websocket, "connecting", .{ .url = url }); @@ -233,6 +236,7 @@ pub fn disconnected(self: *WebSocket, err_: ?anyerror) void { fn cleanup(self: *WebSocket) void { if (self._conn) |conn| { + self._frame._http_owner.removeWS(self); self._http_client.removeConn(conn); self._req_headers.deinit(); self._conn = null; diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 2e456353..3a930412 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -268,7 +268,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?BodyInit, exec_: *const Execution) !v self.acquireRef(); self._active_request = true; - http_client.request(.{ + exec.makeRequest(.{ .ctx = self, .params = .{ .url = self._url, diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 38f7c5ea..4242f089 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -28,7 +28,7 @@ const Frame = @import("../browser/Frame.zig"); const Mime = @import("../browser/Mime.zig"); const Element = @import("../browser/webapi/Element.zig"); const Label = @import("../browser/webapi/element/html/Label.zig"); -const Request = @import("../browser/HttpClient.zig").Request; +const Transfer = @import("../browser/HttpClient.zig").Transfer; const CDPClient = @import("../browser/HttpClient.zig").CDPClient; const WsConnection = @import("../network/WsConnection.zig"); @@ -552,25 +552,21 @@ pub const BrowserContext = struct { env.inspector.?.stopSession(); // abort all intercepted requests before closing the session/page - // since some of these might callback into the page/scriptmanager + // since some of these might callback into the page/scriptmanager. + // intercept_state stores ids — look each one up; if it's already + // gone (out-of-band destroy), there's nothing to abort, but the + // intercepted counter still needs decrementing because we + // incremented it on pause. const http_client = &browser.http_client; - for (self.intercept_state.pendingIntercepts()) |intercept| { - defer { - lp.assert( - http_client.interception_layer.intercepted > 0, - "BrowserContext.deinit.intercepted", - .{ .value = http_client.interception_layer.intercepted }, - ); - http_client.interception_layer.intercepted -= 1; - } - switch (intercept) { - .transfer => |t| { - t.abort(error.ClientDisconnect); - }, - .request => |r| { - defer http_client.deinitRequest(r); - r.error_callback(r.ctx, error.ClientDisconnect); - }, + for (self.intercept_state.pendingIntercepts()) |transfer_id| { + lp.assert( + http_client.interception_layer.intercepted > 0, + "BrowserContext.deinit.intercepted", + .{ .value = http_client.interception_layer.intercepted }, + ); + http_client.interception_layer.intercepted -= 1; + if (http_client.findTransfer(transfer_id)) |transfer| { + transfer.abort(error.ClientDisconnect); } } @@ -781,11 +777,11 @@ pub const BrowserContext = struct { return @import("domains/page.zig").javascriptDialogOpening(self, msg); } - fn keyFromRequestReq(req: *const Request) CDP.BrowserContext.CapturedResponseKey { - return if (req.params.resource_type == .document) - .{ .kind = .loader, .id = req.params.loader_id } + fn keyFromTransfer(transfer: *const Transfer) CDP.BrowserContext.CapturedResponseKey { + return if (transfer.req.params.resource_type == .document) + .{ .kind = .loader, .id = transfer.req.params.loader_id } else - .{ .kind = .request, .id = req.params.request_id }; + .{ .kind = .request, .id = transfer.id }; } pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void { @@ -795,7 +791,7 @@ pub const BrowserContext = struct { const arena = self.frame_arena; // Prepare the captured response value. - const key = keyFromRequestReq(msg.request); + const key = keyFromTransfer(msg.transfer); const gop = try self.captured_responses.getOrPut(arena, key); if (!gop.found_existing) { gop.value_ptr.* = .{ @@ -832,7 +828,7 @@ pub const BrowserContext = struct { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; - const key = keyFromRequestReq(msg.request); + const key = keyFromTransfer(msg.transfer); const resp = self.captured_responses.getPtr(key) orelse lp.assert(false, "onHttpResponseData missing captured response", .{}); return resp.data.appendSlice(arena, msg.data); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 8eb3ab25..5031fe57 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -51,15 +51,15 @@ pub fn processMessage(cmd: *CDP.Command) !void { } } -// Stored in CDP +// Stored in CDP. Holds *transfer ids* (not *Transfer pointers) of paused +// transfers waiting for CDP continueRequest/fulfillRequest/failRequest/ +// continueWithAuth. Anyone resolving an entry must look the transfer up via +// `Client.findTransfer(id)` — if the transfer has been destroyed out-of-band +// (e.g. frame shutdown), the lookup returns null and the CDP command should +// no-op rather than UAF. pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, Pending), - - const Pending = union(enum) { - transfer: *HttpClient.Transfer, - request: HttpClient.Request, - }; + waiting: std.AutoArrayHashMapUnmanaged(u32, void), pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -72,25 +72,21 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void { - return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request }); + pub fn put(self: *InterceptState, transfer_id: u32) !void { + return self.waiting.put(self.allocator, transfer_id, {}); } - pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void { - return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer }); - } - - pub fn remove(self: *InterceptState, request_id: u32) ?Pending { - const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; - return entry.value; + // Returns true if the id was present and removed, false otherwise. + pub fn remove(self: *InterceptState, transfer_id: u32) bool { + return self.waiting.swapRemove(transfer_id); } pub fn deinit(self: *InterceptState) void { self.waiting.deinit(self.allocator); } - pub fn pendingIntercepts(self: *const InterceptState) []Pending { - return self.waiting.values(); + pub fn pendingIntercepts(self: *const InterceptState) []u32 { + return self.waiting.keys(); } }; @@ -201,26 +197,26 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. // We keep it around to wait for modifications to the request. // TODO: What to do when receiving replies for a previous frame's requests? - const request = intercept.request; - try bc.intercept_state.putRequest(request.*); + const transfer = intercept.transfer; + try bc.intercept_state.put(transfer.id); try bc.cdp.sendEvent("Fetch.requestPaused", .{ - .requestId = &id.toInterceptId(request.params.request_id), - .frameId = &id.toFrameId(request.params.frame_id), - .request = network.RequestWriter.init(request), - .resourceType = switch (request.params.resource_type) { + .requestId = &id.toInterceptId(transfer.id), + .frameId = &id.toFrameId(transfer.req.params.frame_id), + .request = network.RequestWriter.init(transfer), + .resourceType = switch (transfer.req.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId(request), // matches the Network REQ-ID + .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ .state = "paused", - .id = request.params.request_id, - .url = request.params.url, + .id = transfer.id, + .url = transfer.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -242,20 +238,28 @@ fn continueRequest(cmd: *CDP.Command) !void { return error.NotImplemented; } + const client = &bc.cdp.browser.http_client; var intercept_state = &bc.intercept_state; - const request_id = try idFromRequestId(params.requestId); + const transfer_id = try idFromRequestId(params.requestId); - const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; - var request = pending.request; + if (!intercept_state.remove(transfer_id)) return error.RequestNotFound; + // Transfer may have been destroyed out-of-band between pause and now + // (e.g. frame shutdown). Treat as a no-op rather than an error — the CDP + // client's view of "this request still exists" is just stale. + const transfer = client.findTransfer(transfer_id) orelse { + log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "continue" }); + return cmd.sendResult(null, .{}); + }; log.debug(.cdp, "request intercept", .{ .state = "continue", - .id = request.params.request_id, - .url = request.params.url, + .id = transfer.id, + .url = transfer.url, .new_url = params.url, }); - const arena = request.params.arena; + const arena = transfer.arena; + const request = &transfer.req; // Update the request with the new parameters if (params.url) |url| { request.params.url = try arena.dupeZ(u8, url); @@ -285,9 +289,7 @@ fn continueRequest(cmd: *CDP.Command) !void { request.params.body = body; } - // todo: replace. - const client = &bc.cdp.browser.http_client; - try client.interception_layer.continueRequest(client, request); + try client.interception_layer.continueRequest(transfer); return cmd.sendResult(null, .{}); } @@ -309,31 +311,37 @@ fn continueWithAuth(cmd: *CDP.Command) !void { }, })) orelse return error.InvalidParams; + const client = &bc.cdp.browser.http_client; var intercept_state = &bc.intercept_state; - const request_id = try idFromRequestId(params.requestId); - const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; - const transfer = pending.transfer; - const request = transfer.req; + const transfer_id = try idFromRequestId(params.requestId); + + if (!intercept_state.remove(transfer_id)) return error.RequestNotFound; + const transfer = client.findTransfer(transfer_id) orelse { + log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "auth" }); + return cmd.sendResult(null, .{}); + }; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", - .id = request.params.request_id, + .id = transfer.id, .response = params.authChallengeResponse.response, }); - const client = &bc.cdp.browser.http_client; - if (params.authChallengeResponse.response != .ProvideCredentials) { transfer.abortAuthChallenge(); return cmd.sendResult(null, .{}); } - // cancel the request, deinit the transfer on error. + // TODO: double-decrement of interception_layer.intercepted if + // continueTransfer fails: continueTransfer decrements unconditionally, + // and the errdefer below decrements again via abortAuthChallenge. + // Worse: if continueTransfer's failure path destroys the transfer + // (start_callback fail in makeRequest), this errdefer hits a freed + // transfer. Pre-existing; needs makeRequest failure-semantics cleanup. errdefer transfer.abortAuthChallenge(); - const arena = request.params.arena; transfer.updateCredentials(try std.fmt.allocPrintSentinel( - arena, + transfer.arena, "{s}:{s}", .{ params.authChallengeResponse.username, @@ -363,16 +371,20 @@ fn fulfillRequest(cmd: *CDP.Command) !void { return error.NotImplemented; } + const client = &bc.cdp.browser.http_client; var intercept_state = &bc.intercept_state; - const request_id = try idFromRequestId(params.requestId); + const transfer_id = try idFromRequestId(params.requestId); - const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; - var request = pending.request; + if (!intercept_state.remove(transfer_id)) return error.RequestNotFound; + const transfer = client.findTransfer(transfer_id) orelse { + log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "fulfill" }); + return cmd.sendResult(null, .{}); + }; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", - .id = request.params.request_id, - .url = request.params.url, + .id = transfer.id, + .url = transfer.url, .status = params.responseCode, .body = params.body != null, }); @@ -380,13 +392,12 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try transfer.arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } - const client = &bc.cdp.browser.http_client; - try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body); + try client.interception_layer.fulfillRequest(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -397,19 +408,22 @@ fn failRequest(cmd: *CDP.Command) !void { errorReason: ErrorReason, })) orelse return error.InvalidParams; - var intercept_state = &bc.intercept_state; - const request_id = try idFromRequestId(params.requestId); - - const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; - const request = pending.request; - const client = &bc.cdp.browser.http_client; - defer client.interception_layer.abortRequest(client, request); + var intercept_state = &bc.intercept_state; + const transfer_id = try idFromRequestId(params.requestId); + + if (!intercept_state.remove(transfer_id)) return error.RequestNotFound; + const transfer = client.findTransfer(transfer_id) orelse { + log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "fail" }); + return cmd.sendResult(null, .{}); + }; + + defer client.interception_layer.abortRequest(transfer); log.info(.cdp, "request intercept", .{ .state = "fail", - .id = request_id, - .url = request.params.url, + .id = transfer.id, + .url = transfer.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); @@ -425,15 +439,15 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati // TODO: What to do when receiving replies for a previous frame's requests? const transfer = intercept.transfer; - try bc.intercept_state.putTransfer(transfer); - var request = transfer.req; + try bc.intercept_state.put(transfer.id); + const request = &transfer.req; const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; try bc.cdp.sendEvent("Fetch.authRequired", .{ - .requestId = &id.toInterceptId(request.params.request_id), + .requestId = &id.toInterceptId(transfer.id), .frameId = &id.toFrameId(request.params.frame_id), - .request = network.RequestWriter.init(&request), + .request = network.RequestWriter.init(transfer), .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", @@ -446,13 +460,13 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId(&request), + .networkId = &id.toRequestId(transfer), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ .state = "paused", - .id = request.params.request_id, - .url = request.params.url, + .id = transfer.id, + .url = transfer.url, }); // Await continueWithAuth diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 76fb5ae3..a92eabbe 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -27,7 +27,6 @@ const Mime = @import("../../browser/Mime.zig"); const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; -const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; const CdpStorage = @import("storage.zig"); @@ -262,7 +261,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId(msg.request), + .requestId = &id.toRequestId(msg.transfer), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -275,7 +274,8 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // things, but no session. const session_id = bc.session_id orelse return; - const req = msg.request; + const transfer = msg.transfer; + const req = &transfer.req; const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; @@ -287,11 +287,11 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId(req), + .requestId = &id.toRequestId(transfer), .loaderId = &id.toLoaderId(req.params.loader_id), .type = req.params.resource_type.string(), .documentURL = frame.url, - .request = RequestWriter.init(req), + .request = RequestWriter.init(transfer), .initiator = .{ .type = "other" }, .redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo .hasUserGesture = false, @@ -305,12 +305,13 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // things, but no session. const session_id = bc.session_id orelse return; - const req = msg.request; + const transfer = msg.transfer; + const req = &transfer.req; // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ .frameId = &id.toFrameId(req.params.frame_id), - .requestId = &id.toRequestId(req), + .requestId = &id.toRequestId(transfer), .loaderId = &id.toLoaderId(req.params.loader_id), .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo @@ -321,19 +322,18 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request // detachTarget could be called, in which case, we still have a frame doing // things, but no session. const session_id = bc.session_id orelse return; - const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId(req), + .requestId = &id.toRequestId(msg.transfer), .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } pub const RequestWriter = struct { - request: *Request, + transfer: *Transfer, - pub fn init(request: *Request) RequestWriter { + pub fn init(transfer: *Transfer) RequestWriter { return .{ - .request = request, + .transfer = transfer, }; } @@ -342,7 +342,8 @@ pub const RequestWriter = struct { } fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void { - const request = self.request; + const transfer = self.transfer; + const request = &transfer.req; try jws.beginObject(); { @@ -376,7 +377,7 @@ pub const RequestWriter = struct { try jws.objectField(hdr.name); try jws.write(hdr.value); } - if (try request.getCookieString()) |cookies| { + if (try request.getCookieString(transfer.arena)) |cookies| { try jws.objectField("Cookie"); try jws.write(cookies[0 .. cookies.len - 1]); } diff --git a/src/cdp/id.zig b/src/cdp/id.zig index f6889d24..a2e01786 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -40,14 +40,14 @@ pub fn toLoaderId(id: u32) [14]u8 { // requestId has special requirements. If it's the main document navigation, // then it should match the loader id. -const Request = @import("../browser/HttpClient.zig").Request; -pub fn toRequestId(req: *const Request) [14]u8 { - if (req.params.resource_type == .document) { - return toLoaderId(req.params.loader_id); +const Transfer = @import("../browser/HttpClient.zig").Transfer; +pub fn toRequestId(transfer: *const Transfer) [14]u8 { + if (transfer.req.params.resource_type == .document) { + return toLoaderId(transfer.req.params.loader_id); } var buf: [14]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable; + _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable; return buf; } diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig index 7febff24..82902196 100644 --- a/src/network/layer/CacheLayer.zig +++ b/src/network/layer/CacheLayer.zig @@ -18,20 +18,21 @@ const std = @import("std"); const lp = @import("lightpanda"); -const log = lp.log; -const http = @import("../http.zig"); -const Client = @import("../../browser/HttpClient.zig").Client; -const Transfer = @import("../../browser/HttpClient.zig").Transfer; -const Request = @import("../../browser/HttpClient.zig").Request; -const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Response = @import("../../browser/HttpClient.zig").Response; const Cache = @import("../cache/Cache.zig"); const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata; const CachedResponse = @import("../cache/Cache.zig").CachedResponse; + const Forward = @import("Forward.zig"); +const log = lp.log; + const CacheLayer = @This(); next: Layer = undefined, @@ -45,54 +46,62 @@ pub fn layer(self: *CacheLayer) Layer { }; } -fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { +fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void { const self: *CacheLayer = @ptrCast(@alignCast(ptr)); - const network = client.network; + const req = &transfer.req; if (req.params.method != .GET) { - return self.next.request(client, req); + return self.next.request(transfer); } - const arena = req.params.arena; + const arena = transfer.arena; var iter = req.params.headers.iterator(); const req_header_list = try iter.collect(arena); - if (network.cache.?.get(arena, .{ - .url = req.params.url, + if (transfer.client.network.cache.?.get(arena, .{ + .url = transfer.url, .timestamp = std.time.timestamp(), .request_headers = req_header_list.items, })) |cached| { + // Cache hit: serve synchronously from the original callbacks, then + // tear down. On error, the transfer is still alive and Client.request's + // errdefer will handle cleanup (loop_owned is still false). try serveFromCache(req, &cached); - client.deinitRequest(req); + transfer.deinit(); return; } - const cache_ctx = try arena.create(CacheContext); - cache_ctx.* = .{ + // Cache miss: install wrappers so we can inspect the response and decide + // whether to write the body into the cache when it's done. + const ctx = try arena.create(CacheContext); + ctx.* = .{ .arena = arena, - .client = client, - .forward = Forward.fromRequest(req), - .req_url = req.params.url, + .transfer = transfer, + .forward = Forward.capture(req), + .req_url = transfer.url, .req_headers = req.params.headers, }; - const wrapped = cache_ctx.forward.wrapRequest( - req, - cache_ctx, - .{ - .start = CacheContext.startCallback, - .header = CacheContext.headerCallback, - .done = CacheContext.doneCallback, - .shutdown = CacheContext.shutdownCallback, - .err = CacheContext.errorCallback, - }, - ); + req.ctx = ctx; + req.header_callback = CacheContext.headerCallback; + req.data_callback = CacheContext.dataCallback; + req.done_callback = CacheContext.doneCallback; + req.error_callback = CacheContext.errorCallback; - return self.next.request(client, wrapped); + if (ctx.forward.start != null) { + // req.ctx was changed, need to ovewrite this + req.start_callback = CacheContext.startCallback; + } + if (ctx.forward.shutdown != null) { + // req.ctx was changed, need to ovewrite this + req.shutdown_callback = CacheContext.shutdownCallback; + } + + return self.next.request(transfer); } -fn serveFromCache(req: Request, cached: *const CachedResponse) !void { +fn serveFromCache(req: *Request, cached: *const CachedResponse) !void { const response = Response.fromCached(req.ctx, cached); defer switch (cached.data) { .buffer => |_| {}, @@ -137,32 +146,41 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { const CacheContext = struct { arena: std.mem.Allocator, - client: *Client, - transfer: ?*Transfer = null, + transfer: *Transfer, forward: Forward, req_url: [:0]const u8, - req_headers: http.Headers, + req_headers: @import("../http.zig").Headers, pending_metadata: ?*CachedMetadata = null, fn startCallback(response: Response) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); - self.transfer = response.inner.transfer; return self.forward.forwardStart(response); } + fn dataCallback(response: Response, chunk: []const u8) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + return self.forward.forwardData(response, chunk); + } + fn headerCallback(response: Response) anyerror!bool { const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); - const allocator = self.arena; - const transfer = response.inner.transfer; - var rh = &transfer.response_header.?; + // For non-transfer responses (fulfilled by interception, or future + // cached-while-cached cases), there's nothing to inspect for caching + // decisions — just forward. + const transfer = switch (response.inner) { + .transfer => |t| t, + else => return self.forward.forwardHeader(response), + }; + + const arena = self.arena; const conn = transfer._conn.?; - const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + var rh = &transfer.response_header.?; const maybe_cm = try Cache.tryCache( - allocator, + arena, std.time.timestamp(), transfer.url, rh.status, @@ -176,7 +194,7 @@ const CacheContext = struct { if (maybe_cm) |cm| { var iter = transfer.responseHeaderIterator(); - var header_list = try iter.collect(allocator); + var header_list = try iter.collect(arena); const end_of_response = header_list.items.len; if (vary) |vary_str| { @@ -186,16 +204,16 @@ const CacheContext = struct { while (vary_iter.next()) |part| { const name = std.mem.trim(u8, part, &std.ascii.whitespace); if (std.ascii.eqlIgnoreCase(hdr.name, name)) { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), + try header_list.append(arena, .{ + .name = try arena.dupe(u8, hdr.name), + .value = try arena.dupe(u8, hdr.value), }); } } } } - const metadata = try allocator.create(CachedMetadata); + const metadata = try arena.create(CachedMetadata); metadata.* = cm; metadata.headers = header_list.items[0..end_of_response]; metadata.vary_headers = header_list.items[end_of_response..]; @@ -207,10 +225,10 @@ const CacheContext = struct { fn doneCallback(ctx: *anyopaque) anyerror!void { const self: *CacheContext = @ptrCast(@alignCast(ctx)); - const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); + const transfer = self.transfer; if (self.pending_metadata) |metadata| { - const cache = &self.client.network.cache.?; + const cache = &transfer.client.network.cache.?; log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig index b11ff23f..140823a4 100644 --- a/src/network/layer/Forward.zig +++ b/src/network/layer/Forward.zig @@ -16,6 +16,9 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +// A snapshot of the original ctx + callbacks from a Request, taken before a +// layer overwrites them with its own wrappers. The layer's wrapper callbacks +// call forwardX(...) to invoke the captured originals with the original ctx. const Request = @import("../../browser/HttpClient.zig").Request; const Response = @import("../../browser/HttpClient.zig").Response; @@ -29,7 +32,7 @@ done: Request.DoneCallback, err: Request.ErrorCallback, shutdown: ?Request.ShutdownCallback, -pub fn fromRequest(req: Request) Forward { +pub fn capture(req: *const Request) Forward { return .{ .ctx = req.ctx, .start = req.start_callback, @@ -41,68 +44,6 @@ pub fn fromRequest(req: Request) Forward { }; } -pub const Overrides = struct { - start: ?Request.StartCallback = null, - header: ?Request.HeaderCallback = null, - data: ?Request.DataCallback = null, - done: ?Request.DoneCallback = null, - err: ?Request.ErrorCallback = null, - shutdown: ?Request.ShutdownCallback = null, -}; - -pub fn wrapRequest( - self: *Forward, - req: Request, - new_ctx: anytype, - overrides: Overrides, -) Request { - const T = @TypeOf(new_ctx.*); - const PassthroughT = makePassthrough(T, "forward"); - var wrapped = req; - wrapped.ctx = new_ctx; - wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; - wrapped.header_callback = overrides.header orelse PassthroughT.header; - wrapped.data_callback = overrides.data orelse PassthroughT.data; - wrapped.done_callback = overrides.done orelse PassthroughT.done; - wrapped.error_callback = overrides.err orelse PassthroughT.err; - wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null; - return wrapped; -} - -fn makePassthrough(comptime T: type, comptime field: []const u8) type { - return struct { - pub fn start(response: Response) anyerror!void { - const self: *T = @ptrCast(@alignCast(response.ctx)); - return @field(self, field).forwardStart(response); - } - - pub fn header(response: Response) anyerror!bool { - const self: *T = @ptrCast(@alignCast(response.ctx)); - return @field(self, field).forwardHeader(response); - } - - pub fn data(response: Response, chunk: []const u8) anyerror!void { - const self: *T = @ptrCast(@alignCast(response.ctx)); - return @field(self, field).forwardData(response, chunk); - } - - pub fn done(ctx_ptr: *anyopaque) anyerror!void { - const self: *T = @ptrCast(@alignCast(ctx_ptr)); - return @field(self, field).forwardDone(); - } - - pub fn err(ctx_ptr: *anyopaque, e: anyerror) void { - const self: *T = @ptrCast(@alignCast(ctx_ptr)); - @field(self, field).forwardErr(e); - } - - pub fn shutdown(ctx_ptr: *anyopaque) void { - const self: *T = @ptrCast(@alignCast(ctx_ptr)); - @field(self, field).forwardShutdown(); - } - }; -} - pub fn forwardStart(self: Forward, response: Response) anyerror!void { var fwd = response; fwd.ctx = self.ctx; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 165cd254..6fc5c9bc 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -26,6 +26,7 @@ const IS_DEBUG = builtin.mode == .Debug; const http = @import("../http.zig"); const Client = @import("../../browser/HttpClient.zig").Client; const Request = @import("../../browser/HttpClient.zig").Request; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; const Response = @import("../../browser/HttpClient.zig").Response; const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; const Layer = @import("../../browser/HttpClient.zig").Layer; @@ -33,13 +34,11 @@ const Forward = @import("Forward.zig"); const InterceptionLayer = @This(); -// Count of intercepted requests. This is to help deal with intercepted requests. -// The client doesn't track intercepted transfers. If a request is intercepted, -// the client forgets about it and requires the interceptor to continue or abort -// it. That works well, except if we only rely on active, we might think there's -// no more network activity when, with interecepted requests, there might be more -// in the future. (We really only need this to properly emit a 'networkIdle' and -// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +// Count of intercepted requests. The client doesn't track intercepted transfers +// on its own active counters: once intercepted, a transfer leaves the layer +// chain and waits for the interceptor (CDP) to call continue/abort/fulfill. +// We track them here so the network-idle / network-almost-idle CDP lifecycle +// events don't fire prematurely. intercepted: usize = 0, next: Layer = undefined, @@ -51,35 +50,33 @@ pub fn layer(self: *InterceptionLayer) Layer { }; } -fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { +fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void { const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); + const req = &transfer.req; - const intercept_ctx = try in_req.params.arena.create(InterceptContext); - intercept_ctx.* = .{ - .client = client, - .forward = Forward.fromRequest(in_req), + const ctx = try transfer.arena.create(InterceptContext); + ctx.* = .{ .layer = self, - .request = in_req, + .transfer = transfer, + .forward = Forward.capture(req), }; - var req = intercept_ctx.forward.wrapRequest( - in_req, - intercept_ctx, - .{ - .start = InterceptContext.startCallback, - .header = InterceptContext.headerCallback, - .data = InterceptContext.dataCallback, - .done = InterceptContext.doneCallback, - .err = InterceptContext.errorCallback, - .shutdown = InterceptContext.shutdownCallback, - }, - ); + // Install our wrappers on the transfer's request. The interceptor wants to + // observe every callback (start/header/data/done/err/shutdown) so it can + // mirror the Network.* CDP events. + req.ctx = ctx; + if (ctx.forward.start != null) req.start_callback = InterceptContext.startCallback; + req.header_callback = InterceptContext.headerCallback; + req.data_callback = InterceptContext.dataCallback; + req.done_callback = InterceptContext.doneCallback; + req.error_callback = InterceptContext.errorCallback; + if (ctx.forward.shutdown != null) req.shutdown_callback = InterceptContext.shutdownCallback; - req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); + req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); var wait_for_interception = false; req.params.notification.dispatch(.http_request_intercept, &.{ - .request = &req, + .transfer = transfer, .wait_for_interception = &wait_for_interception, }); @@ -90,40 +87,44 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { }); if (!wait_for_interception) { - return self.next.request(client, req); + return self.next.request(transfer); } + // Paused: the CDP listener stashed `transfer` and will eventually call + // continueRequest / abortRequest / fulfillRequest. Until then, CDP owns + // the transfer's lifecycle, so flag it loop_owned to keep the outer + // Client.request errdefer from tearing it down. self.intercepted += 1; + transfer.loop_owned = true; if (comptime IS_DEBUG) { log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); } } pub const InterceptContext = struct { - client: *Client, - forward: Forward, layer: *InterceptionLayer, - request: Request, + transfer: *Transfer, + forward: Forward, content_length: usize = 0, fn startCallback(response: Response) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); - log.debug(.http, "intercept start", .{ .url = self.request.params.url }); + log.debug(.http, "intercept start", .{ .url = self.transfer.url }); return self.forward.forwardStart(response); } fn headerCallback(response: Response) anyerror!bool { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); log.debug(.http, "intercept header", .{ - .url = self.request.params.url, + .url = self.transfer.url, .status = response.status(), .content_length = response.contentLength(), }); self.content_length = response.contentLength() orelse 0; - self.request.params.notification.dispatch(.http_response_header_done, &.{ - .request = &self.request, + self.transfer.req.params.notification.dispatch(.http_response_header_done, &.{ + .transfer = self.transfer, .response = &response, }); @@ -133,13 +134,13 @@ pub const InterceptContext = struct { fn dataCallback(response: Response, chunk: []const u8) anyerror!void { const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); log.debug(.http, "intercept data", .{ - .url = self.request.params.url, + .url = self.transfer.url, .len = chunk.len, }); - self.request.params.notification.dispatch(.http_response_data, &.{ + self.transfer.req.params.notification.dispatch(.http_response_data, &.{ .data = chunk, - .request = &self.request, + .transfer = self.transfer, }); return self.forward.forwardData(response, chunk); @@ -149,12 +150,12 @@ pub const InterceptContext = struct { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); log.debug(.http, "intercept done", .{ - .url = self.request.params.url, + .url = self.transfer.url, .content_length = self.content_length, }); - self.request.params.notification.dispatch(.http_request_done, &.{ - .request = &self.request, + self.transfer.req.params.notification.dispatch(.http_request_done, &.{ + .transfer = self.transfer, .content_length = self.content_length, }); return self.forward.forwardDone(); @@ -164,11 +165,11 @@ pub const InterceptContext = struct { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); log.debug(.http, "intercept error", .{ - .url = self.request.params.url, + .url = self.transfer.url, .err = err, }); - self.request.params.notification.dispatch(.http_request_fail, &.{ - .request = &self.request, + self.transfer.req.params.notification.dispatch(.http_request_fail, &.{ + .transfer = self.transfer, .err = err, }); self.forward.forwardErr(err); @@ -177,50 +178,82 @@ pub const InterceptContext = struct { fn shutdownCallback(ctx: *anyopaque) void { const self: *InterceptContext = @ptrCast(@alignCast(ctx)); - log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); - self.request.params.notification.dispatch(.http_request_fail, &.{ - .request = &self.request, + log.debug(.http, "intercept shutdown", .{ .url = self.transfer.url }); + self.transfer.req.params.notification.dispatch(.http_request_fail, &.{ + .transfer = self.transfer, .err = error.Shutdown, }); self.forward.forwardShutdown(); } }; -// CDP Callbacks -// These handle their own clean up on errors with `self.next.request`. -// This is because they don't pass their error up the chain as they are async callbacks. +// CDP-driven resolution entry points. The transfer was paused inside `request` +// (loop_owned = true). One of these three is called by CDP to resume / drop +// the transfer. -pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { +pub fn continueRequest(self: *InterceptionLayer, transfer: *Transfer) anyerror!void { if (comptime IS_DEBUG) { lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); } - self.intercepted -= 1; - self.next.request(client, req) catch |err| { - const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); - req.error_callback(req.ctx, err); - ctx.client.deinitRequest(req); + + // Resume the layer chain. Ownership is re-handed to whichever subsequent + // layer commits the transfer (queue, multi, or another pause). If the + // chain fails before any commit, we clean up here. Mirror the errdefer + // pattern in Client.request. + transfer.loop_owned = false; + self.next.request(transfer) catch |err| { + if (!transfer.loop_owned) { + transfer.abort(err); + } return err; }; } -pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { +pub fn abortRequest(self: *InterceptionLayer, transfer: *Transfer) void { if (comptime IS_DEBUG) { lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted }); log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; - - req.error_callback(req.ctx, error.Abort); - client.deinitRequest(req); + transfer.abort(error.Abort); } -fn fulfillInner( - req: Request, +pub fn fulfillRequest( + self: *InterceptionLayer, + transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8, +) !void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); + log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); + } + self.intercepted -= 1; + + // `done` flips true once we've called the user's done_callback. If + // done_callback itself throws, the user already saw their end-of-flow + // notification; suppress error_callback to avoid double-notify. + var done: bool = false; + fulfillInner(&transfer.req, status, headers, body, &done) catch |err| { + if (!done) { + transfer.abort(err); + } else { + transfer.deinit(); + } + return err; + }; + transfer.deinit(); +} + +fn fulfillInner( + req: *Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, + done: *bool, ) !void { const fulfilled = FulfilledResponse{ .status = status, @@ -244,27 +277,6 @@ fn fulfillInner( try req.data_callback(response, b); } + done.* = true; try req.done_callback(req.ctx); } - -pub fn fulfillRequest( - self: *InterceptionLayer, - client: *Client, - req: Request, - status: u16, - headers: []const http.Header, - body: ?[]const u8, -) !void { - if (comptime IS_DEBUG) { - lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); - log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); - } - - self.intercepted -= 1; - defer client.deinitRequest(req); - - fulfillInner(req, status, headers, body) catch |err| { - req.error_callback(req.ctx, err); - return err; - }; -} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 7d15bade..710884e9 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -18,21 +18,27 @@ const std = @import("std"); const lp = @import("lightpanda"); -const log = lp.log; const URL = @import("../../browser/URL.zig"); -const Robots = @import("../Robots.zig"); -const Client = @import("../../browser/HttpClient.zig").Client; -const Request = @import("../../browser/HttpClient.zig").Request; -const Response = @import("../../browser/HttpClient.zig").Response; const Layer = @import("../../browser/HttpClient.zig").Layer; +const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Response = @import("../../browser/HttpClient.zig").Response; + +const Robots = @import("../Robots.zig"); +const Network = @import("../Network.zig"); + const Forward = @import("Forward.zig"); +const log = lp.log; +const Allocator = std.mem.Allocator; + const RobotsLayer = @This(); next: Layer = undefined, -allocator: std.mem.Allocator, -pending: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, +network: *Network, +allocator: Allocator, +pending: std.StringHashMapUnmanaged(std.ArrayList(*Transfer)) = .empty, pub fn layer(self: *RobotsLayer) Layer { return .{ @@ -43,7 +49,7 @@ pub fn layer(self: *RobotsLayer) Layer { }; } -pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { +pub fn deinit(self: *RobotsLayer, allocator: Allocator) void { var it = self.pending.iterator(); while (it.next()) |entry| { entry.value_ptr.deinit(allocator); @@ -51,35 +57,38 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { self.pending.deinit(allocator); } -fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { +fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void { const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); - const arena = req.params.arena; - const robots_url = try URL.getRobotsUrl(arena, req.params.url); + if (transfer.req.params.skip_robots) { + return self.next.request(transfer); + } - if (client.network.robot_store.get(robots_url)) |robot_entry| { + const url = transfer.url; + const robots_url = try URL.getRobotsUrl(transfer.arena, url); + + if (self.network.robot_store.get(robots_url)) |robot_entry| { switch (robot_entry) { .present => |robots| { - const path = URL.getPathname(req.params.url); + const path = URL.getPathname(url); if (!robots.isAllowed(path)) { - log.warn(.http, "blocked by robots", .{ .url = req.params.url }); + log.warn(.http, "blocked by robots", .{ .url = url }); return error.RobotsBlocked; } }, .absent => {}, } - return self.next.request(client, req); + return self.next.request(transfer); } - return self.fetchRobotsThenRequest(client, robots_url, req); + return self.fetchRobotsThenRequest(robots_url, transfer); } fn fetchRobotsThenRequest( self: *RobotsLayer, - client: *Client, robots_url: [:0]const u8, - req: Request, + transfer: *Transfer, ) !void { const entry = try self.pending.getOrPut(self.allocator, robots_url); @@ -87,84 +96,95 @@ fn fetchRobotsThenRequest( errdefer std.debug.assert(self.pending.remove(robots_url)); entry.value_ptr.* = .empty; - // This arena is later owned by the Request. It does not need to be cleaned up by us because - // it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers. - const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext"); - errdefer client.network.app.arena_pool.release(new_arena); - - const robots_ctx = try new_arena.create(RobotsContext); + const robots_ctx = try transfer.arena.create(RobotsContext); robots_ctx.* = .{ .layer = self, - .client = client, - .arena = new_arena, - .robots_url = robots_url, .buffer = .empty, + .arena = transfer.arena, + .robots_url = robots_url, }; - const headers = try client.newHeaders(); - log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + var params = transfer.req.params; + if (@typeInfo(@TypeOf(params)) != .@"struct") { + // protect against mutating the original request + @compileError("expected request.params to be a struct"); + } - try self.next.request(client, .{ + // CRITICAL: build a fresh Headers for the inner robots fetch. + // params is value-copied from the parent's req.params, but + // Headers is a struct wrapping a *curl_slist — value copy shares + // the pointer. Letting Client.request take ownership of a shared + // headers list means both transfers will free it at deinit time + // -> double-free. The robots.txt fetch is a system-level GET + // anyway, no need to inherit the parent's user headers. + params.headers = try transfer.client.newHeaders(); + errdefer params.headers.deinit(); + params.method = .GET; + params.url = robots_url; + params.skip_robots = true; + params.resource_type = .fetch; + params.body = null; + + log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + try transfer.client.request(.{ .ctx = robots_ctx, - .params = .{ - // We have to do this ourselves because we are not going through the top level `request`. - .arena = new_arena, - .request_id = client.incrReqId(), - .url = robots_url, - .method = .GET, - .headers = headers, - .frame_id = req.params.frame_id, - .loader_id = req.params.loader_id, - .cookie_jar = req.params.cookie_jar, - .cookie_origin = req.params.cookie_origin, - .notification = req.params.notification, - .resource_type = .fetch, - }, + .params = params, .header_callback = RobotsContext.headerCallback, .data_callback = RobotsContext.dataCallback, .done_callback = RobotsContext.doneCallback, .error_callback = RobotsContext.errorCallback, .shutdown_callback = RobotsContext.shutdownCallback, - }); + }, transfer.owner); } - try entry.value_ptr.append(self.allocator, req); + try entry.value_ptr.append(self.allocator, transfer); + // Parked: RobotsLayer owns destruction via flushPending / flushPendingShutdown + // until robots.txt resolves. Without this, Client.request's errdefer (or + // any caller's cleanup) would deinit a transfer that's still on the + // pending list, leaving flushPending with a dangling pointer. + transfer.loop_owned = true; } -fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void { - var queued = self.pending.fetchRemove(robots_url) orelse - @panic("RobotsLayer.flushPending: missing queue"); +fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) void { + var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPending: missing queue"); defer queued.value.deinit(self.allocator); - for (queued.value.items) |queued_req| { + for (queued.value.items) |transfer| { if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); - defer client.deinitRequest(queued_req); - queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); + log.warn(.http, "blocked by robots", .{ .url = transfer.url }); + transfer.abort(error.RobotsBlocked); } else { - self.next.request(client, queued_req) catch |e| { - defer client.deinitRequest(queued_req); - queued_req.error_callback(queued_req.ctx, e); + // Reset ownership: handing back to the layer chain. If a downstream + // layer commits (multi / queue / pause), it'll flip loop_owned back + // to true. If it fails before committing, we clean up here. + transfer.loop_owned = false; + self.next.request(transfer) catch |e| { + if (!transfer.loop_owned) { + transfer.abort(e); + } }; } } } -fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void { - var queued = self.pending.fetchRemove(robots_url) orelse +// Invariant: shutdown_callback fires on a Transfer only via Transfer.kill, +// and the only callers of kill are Client.abortOwner / .abortRequests +// (owner-driven teardown). So if THIS robots fetch's shutdown_callback +// fired, the owner is being torn down — every parked transfer in this +// pending queue is on the same owner list and is already being killed by +// the same walk. We just need to drop the pending entry; the owner walk +// handles the rest. (If a future code path adds per-transfer kill +// without owner teardown, this assumption breaks — see comment above +// detachOrDeinit in HttpClient.zig.) +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { + var pending = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPendingShutdown: missing queue"); - defer queued.value.deinit(self.allocator); - - for (queued.value.items) |queued_req| { - defer client.deinitRequest(queued_req); - if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); - } + pending.value.deinit(self.allocator); } const RobotsContext = struct { layer: *RobotsLayer, - arena: std.mem.Allocator, - client: *Client, + arena: Allocator, robots_url: [:0]const u8, buffer: std.ArrayList(u8), status: u16 = 0, @@ -199,11 +219,10 @@ const RobotsContext = struct { fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const client = self.client; const robots_url = self.robots_url; var allowed = true; - const network = client.network; + const network = l.network; switch (self.status) { 200 => { @@ -218,7 +237,7 @@ const RobotsContext = struct { }; if (robots) |r| { try network.robot_store.put(robots_url, r); - const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].req.params.url); allowed = r.isAllowed(path); } } @@ -236,26 +255,24 @@ const RobotsContext = struct { }, } - l.flushPending(client, robots_url, allowed); + l.flushPending(robots_url, allowed); } fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const client = self.client; const robots_url = self.robots_url; log.warn(.http, "robots fetch failed", .{ .err = err }); - l.flushPending(client, robots_url, true); + l.flushPending(robots_url, true); } fn shutdownCallback(ctx_ptr: *anyopaque) void { const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); const l = self.layer; - const client = self.client; const robots_url = self.robots_url; log.debug(.http, "robots fetch shutdown", .{}); - l.flushPendingShutdown(robots_url, client); + l.flushPendingShutdown(robots_url); } }; diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig index 7e67af49..804ec586 100644 --- a/src/network/layer/WebBotAuthLayer.zig +++ b/src/network/layer/WebBotAuthLayer.zig @@ -18,13 +18,15 @@ const std = @import("std"); const lp = @import("lightpanda"); -const log = lp.log; + +const WebBotAuth = @import("../WebBotAuth.zig"); const URL = @import("../../browser/URL.zig"); -const WebBotAuth = @import("../WebBotAuth.zig"); -const Client = @import("../../browser/HttpClient.zig").Client; -const Request = @import("../../browser/HttpClient.zig").Request; const Layer = @import("../../browser/HttpClient.zig").Layer; +const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; + +const log = lp.log; const WebBotAuthLayer = @This(); @@ -37,15 +39,13 @@ pub fn layer(self: *WebBotAuthLayer) Layer { }; } -fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { +fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void { const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); - var our_req = req; - const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + const wba = transfer.client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); - const arena = req.params.arena; - const authority = URL.getHost(req.params.url); - try wba.signRequest(arena, &our_req.params.headers, authority); + const authority = URL.getHost(transfer.url); + try wba.signRequest(transfer.arena, &transfer.req.params.headers, authority); - return self.next.request(client, our_req); + return self.next.request(transfer); }