mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
Merge pull request #2420 from lightpanda-io/http_client
HttpClient Improvements
This commit is contained in:
@@ -21,9 +21,7 @@ const lp = @import("lightpanda");
|
||||
|
||||
const Frame = @import("browser/Frame.zig");
|
||||
const Transfer = @import("browser/HttpClient.zig").Transfer;
|
||||
const Request = @import("browser/HttpClient.zig").Request;
|
||||
const Response = @import("browser/HttpClient.zig").Response;
|
||||
const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext;
|
||||
|
||||
const log = lp.log;
|
||||
const List = std.DoublyLinkedList;
|
||||
@@ -170,11 +168,11 @@ pub const FrameLoaded = struct {
|
||||
};
|
||||
|
||||
pub const RequestStart = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
};
|
||||
|
||||
pub const RequestIntercept = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
wait_for_interception: *bool,
|
||||
};
|
||||
|
||||
@@ -185,21 +183,21 @@ pub const RequestAuthRequired = struct {
|
||||
|
||||
pub const ResponseData = struct {
|
||||
data: []const u8,
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
};
|
||||
|
||||
pub const ResponseHeaderDone = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
response: *const Response,
|
||||
};
|
||||
|
||||
pub const RequestDone = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
content_length: usize,
|
||||
};
|
||||
|
||||
pub const RequestFail = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
err: anyerror,
|
||||
};
|
||||
|
||||
|
||||
@@ -154,6 +154,8 @@ _to_load: *std.ArrayList(*Element.Html) = undefined,
|
||||
_style_manager: StyleManager,
|
||||
_script_manager: ScriptManager,
|
||||
|
||||
_http_owner: HttpClient.Owner = .{},
|
||||
|
||||
// List of active live ranges (for mutation updates per DOM spec)
|
||||
_live_ranges: std.DoublyLinkedList = .{},
|
||||
|
||||
@@ -403,8 +405,7 @@ pub fn deinit(self: *Frame) void {
|
||||
|
||||
const browser = page.session.browser;
|
||||
|
||||
// don't abort pending frames.
|
||||
browser.http_client.abortFrame(self._frame_id, .{});
|
||||
browser.http_client.abortOwner(&self._http_owner);
|
||||
|
||||
browser.env.destroyContext(self.js);
|
||||
|
||||
@@ -632,7 +633,7 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
|
||||
// Session.initiateRootNavigation) flags both the notification and the
|
||||
// HTTP request itself: CDP skips its node-registry reset until commit,
|
||||
// and the in-flight transfer survives the OLD page's frame.deinit which
|
||||
// calls http_client.abortFrame(frame_id) on the shared frame_id during
|
||||
// calls http_client.abortList() on the shared frame_id during
|
||||
// commitPendingPage.
|
||||
const is_pending_root = self._page._state == .pending;
|
||||
|
||||
@@ -656,7 +657,7 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
|
||||
|
||||
session.navigation._current_navigation_kind = opts.kind;
|
||||
|
||||
http_client.request(.{
|
||||
self.makeRequest(.{
|
||||
.ctx = self,
|
||||
.params = .{
|
||||
.url = self.url,
|
||||
@@ -669,7 +670,6 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
|
||||
.cookie_origin = self.url,
|
||||
.resource_type = .document,
|
||||
.notification = self._session.notification,
|
||||
.protect_from_abort = is_pending_root,
|
||||
},
|
||||
.header_callback = frameHeaderDoneCallback,
|
||||
.data_callback = frameDataCallback,
|
||||
@@ -760,7 +760,9 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url
|
||||
.type = target._type,
|
||||
});
|
||||
|
||||
session.browser.http_client.abortFrame(target._frame_id, .{});
|
||||
// Navigation: kill in-flight HTTP transfers, but leave WebSockets
|
||||
// alive — they're cross-document by spec.
|
||||
session.browser.http_client.abortRequests(&target._http_owner);
|
||||
|
||||
// Capture the originating frame's URL as the Referer for this
|
||||
// navigation. The originator's frame may be torn down before navigate()
|
||||
@@ -821,6 +823,19 @@ fn canScheduleNavigation(self: *Frame, new_target_type: NavigationType) bool {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn makeRequest(self: *Frame, req: HttpClient.Request) !void {
|
||||
return self._session.browser.http_client.request(req, &self._http_owner);
|
||||
}
|
||||
|
||||
// Synchronously abort every transfer and WebSocket owned by this frame
|
||||
// and all of its descendants.
|
||||
pub fn abortTransfers(self: *Frame) void {
|
||||
for (self.child_frames.items) |child| {
|
||||
child.abortTransfers();
|
||||
}
|
||||
self._session.browser.http_client.abortOwner(&self._http_owner);
|
||||
}
|
||||
|
||||
pub fn documentIsLoaded(self: *Frame) void {
|
||||
if (self._load_state != .parsing) {
|
||||
// Ideally, documentIsLoaded would only be called once, but if a
|
||||
@@ -971,20 +986,8 @@ fn frameHeaderDoneCallback(response: HttpClient.Response) !bool {
|
||||
// frame_remove (clears OLD V8 context group + CDP node_registry),
|
||||
// tears down the OLD page, flips the pointer, and dispatches
|
||||
// frame_created against the new (now active) frame.
|
||||
//
|
||||
// The OLD page's frame.deinit calls http_client.abortFrame(frame_id) on
|
||||
// the frame_id it shares with the (now-active) pending page; our transfer
|
||||
// survives because Session.initiateRootNavigation flagged the request
|
||||
// protect_from_abort, which abortFrame's default .normal scope honors.
|
||||
// Once we are past commit, that protection is no longer needed and may
|
||||
// interfere with subsequent aborts (e.g. another navigation while we are
|
||||
// still streaming the body), so clear it.
|
||||
if (self._page._state == .pending) {
|
||||
try self._session.commitPendingPage();
|
||||
switch (response.inner) {
|
||||
.transfer => |t| t.req.params.protect_from_abort = false,
|
||||
.fulfilled, .cached => {},
|
||||
}
|
||||
}
|
||||
|
||||
const response_url = response.url();
|
||||
|
||||
@@ -20,14 +20,16 @@ const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const URL = @import("URL.zig");
|
||||
const ArenaPool = @import("../ArenaPool.zig");
|
||||
const Notification = @import("../Notification.zig");
|
||||
const timestamp = @import("../datetime.zig").timestamp;
|
||||
|
||||
const URL = @import("URL.zig");
|
||||
const CookieJar = @import("webapi/storage/Cookie.zig").Jar;
|
||||
|
||||
const http = @import("../network/http.zig");
|
||||
const Network = @import("../network/Network.zig");
|
||||
const Robots = @import("../network/Robots.zig");
|
||||
const timestamp = @import("../datetime.zig").timestamp;
|
||||
const Network = @import("../network/Network.zig");
|
||||
|
||||
const log = lp.log;
|
||||
const posix = std.posix;
|
||||
@@ -46,7 +48,7 @@ pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig");
|
||||
pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig");
|
||||
pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig");
|
||||
|
||||
// This is loosely tied to a browser Page. Loading all the <scripts>, doing
|
||||
// This is loosely tied to a browser Frame. Loading all the <scripts>, doing
|
||||
// XHR requests, and loading imports all happens through here. Sine the app
|
||||
// currently supports 1 browser and 1 frame at-a-time, we only have 1 Client and
|
||||
// re-use it from frame to frame. This allows us better re-use of the various
|
||||
@@ -55,7 +57,7 @@ pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig");
|
||||
// The app has other secondary http needs, like telemetry. While we want to
|
||||
// share some things (namely the ca blob, and maybe some configuration
|
||||
// (TODO: ??? should proxy settings be global ???)), we're able to call
|
||||
// client.abortFrame() to abort the transfers being made by a frame, without
|
||||
// client.abortList() to abort the transfers being made by a frame, without
|
||||
// impacting those other http requests.
|
||||
pub const Client = @This();
|
||||
|
||||
@@ -80,6 +82,13 @@ performing: bool = false,
|
||||
// Use to generate the next request ID
|
||||
next_request_id: u32 = 0,
|
||||
|
||||
// Every currently-alive Transfer indexed by its id. Maintained so cross-
|
||||
// component code (CDP intercept state, future scheduling/debugging) can
|
||||
// look up a transfer by id without holding a *Transfer that might dangle.
|
||||
// Inserted in Client.request, removed in Transfer.deinit. The pointer is
|
||||
// only valid for the lifetime of the entry.
|
||||
transfers: std.AutoHashMapUnmanaged(u32, *Transfer) = .empty,
|
||||
|
||||
// When handles has no more available easys, requests get queued.
|
||||
queue: std.DoublyLinkedList = .{},
|
||||
|
||||
@@ -95,10 +104,7 @@ allocator: Allocator,
|
||||
|
||||
network: *Network,
|
||||
|
||||
// Once we have a handle/easy to process a request with, we create a Transfer
|
||||
// which contains the Request as well as any state we need to process the
|
||||
// request. These will come and go with each request.
|
||||
transfer_pool: std.heap.MemoryPool(Transfer),
|
||||
arena_pool: *ArenaPool,
|
||||
|
||||
// The current proxy. CDP can change it, changeProxy(null) restores
|
||||
// from config.
|
||||
@@ -135,11 +141,11 @@ pub const Layer = struct {
|
||||
vtable: *const VTable,
|
||||
|
||||
pub const VTable = struct {
|
||||
request: *const fn (*anyopaque, *Client, Request) anyerror!void,
|
||||
request: *const fn (*anyopaque, *Transfer) anyerror!void,
|
||||
};
|
||||
|
||||
pub fn request(self: Layer, client: *Client, req: Request) !void {
|
||||
return self.vtable.request(self.ptr, client, req);
|
||||
pub fn request(self: Layer, transfer: *Transfer) !void {
|
||||
return self.vtable.request(self.ptr, transfer);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -167,9 +173,6 @@ pub const CDPClient = struct {
|
||||
};
|
||||
|
||||
pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client: ?CDPClient) !void {
|
||||
var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
|
||||
errdefer transfer_pool.deinit();
|
||||
|
||||
var handles = try http.Handles.init(network.config);
|
||||
errdefer handles.deinit();
|
||||
|
||||
@@ -179,7 +182,6 @@ pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client:
|
||||
.handles = handles,
|
||||
.network = network,
|
||||
.allocator = allocator,
|
||||
.transfer_pool = transfer_pool,
|
||||
.cdp_client = cdp_client,
|
||||
|
||||
.use_proxy = http_proxy != null,
|
||||
@@ -189,10 +191,11 @@ pub fn init(self: *Client, allocator: Allocator, network: *Network, cdp_client:
|
||||
.max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32),
|
||||
|
||||
.cache_layer = .{},
|
||||
.robots_layer = .{ .allocator = allocator },
|
||||
.robots_layer = .{ .allocator = allocator, .network = network },
|
||||
.web_bot_auth_layer = .{},
|
||||
.interception_layer = .{},
|
||||
.entry_layer = undefined,
|
||||
.arena_pool = &network.app.arena_pool,
|
||||
};
|
||||
|
||||
var next = self.layer();
|
||||
@@ -220,10 +223,18 @@ pub fn deinit(self: *Client) void {
|
||||
self.abort();
|
||||
self.handles.deinit();
|
||||
|
||||
self.transfer_pool.deinit();
|
||||
self.clearUserAgentOverride();
|
||||
|
||||
self.robots_layer.deinit(self.allocator);
|
||||
self.transfers.deinit(self.allocator);
|
||||
}
|
||||
|
||||
// Look up a live transfer by its id. Returns null if the transfer has been
|
||||
// destroyed. Use this — rather than holding *Transfer across yields — for
|
||||
// any code path that's interleaved with the request lifecycle (CDP
|
||||
// continueRequest/fulfill/abort, async cleanups).
|
||||
pub fn findTransfer(self: *Client, id: u32) ?*Transfer {
|
||||
return self.transfers.get(id);
|
||||
}
|
||||
|
||||
pub fn layer(self: *Client) Layer {
|
||||
@@ -300,121 +311,152 @@ pub fn getUserAgent(self: *const Client) [:0]const u8 {
|
||||
return self.user_agent_override orelse self.network.config.http_headers.user_agent;
|
||||
}
|
||||
|
||||
const AbortOpts = struct {
|
||||
scope: enum { normal, full } = .normal,
|
||||
};
|
||||
|
||||
pub fn abort(self: *Client) void {
|
||||
self._abort(true, 0, .{ .scope = .full });
|
||||
}
|
||||
|
||||
// abortFrame with .normal doesn't abort protect_from_abort requests.
|
||||
// .full abort all relqtive requests.
|
||||
pub fn abortFrame(self: *Client, frame_id: u32, opts: AbortOpts) void {
|
||||
self._abort(false, frame_id, opts);
|
||||
}
|
||||
|
||||
// Written this way so that both abort and abortFrame can share the same code
|
||||
// but abort can avoid the frame_id check at comptime.
|
||||
fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32, opts: AbortOpts) void {
|
||||
abortConnections(self.in_use, abort_all, frame_id, opts);
|
||||
abortConnections(self.ready_queue, abort_all, frame_id, opts);
|
||||
|
||||
{
|
||||
var q = &self.queue;
|
||||
var n = q.first;
|
||||
while (n) |node| {
|
||||
n = node.next;
|
||||
const transfer: *Transfer = @fieldParentPtr("_node", node);
|
||||
const params = transfer.req.params;
|
||||
if (comptime abort_all) {
|
||||
transfer.kill();
|
||||
} else if (params.frame_id == frame_id) {
|
||||
if (opts.scope == .full or !params.protect_from_abort) {
|
||||
q.remove(node);
|
||||
transfer.kill();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Snapshot before killing: kill() -> deinit removes entries from
|
||||
// self.transfers, which would invalidate a live iterator.
|
||||
var snapshot = std.ArrayList(*Transfer).initCapacity(self.allocator, self.transfers.count()) catch @panic("OOM");
|
||||
defer snapshot.deinit(self.allocator);
|
||||
var it = self.transfers.valueIterator();
|
||||
while (it.next()) |t| {
|
||||
snapshot.appendAssumeCapacity(t.*);
|
||||
}
|
||||
|
||||
if (comptime abort_all) {
|
||||
self.queue = .{};
|
||||
self.ready_queue = .{};
|
||||
for (snapshot.items) |t| {
|
||||
t.kill();
|
||||
}
|
||||
|
||||
if (comptime IS_DEBUG and abort_all) {
|
||||
var it = self.in_use.first;
|
||||
var leftover: usize = 0;
|
||||
while (it) |node| : (it = node.next) {
|
||||
const conn: *http.Connection = @fieldParentPtr("node", node);
|
||||
switch (conn.transport) {
|
||||
.http => |transfer| std.debug.assert(transfer.aborted),
|
||||
.websocket => {},
|
||||
.none => {},
|
||||
}
|
||||
leftover += 1;
|
||||
}
|
||||
std.debug.assert(self.http_active == leftover);
|
||||
// After the kill loop, every internal list should drain itself via
|
||||
// each transfer's deinit:
|
||||
// - self.transfers : transfers.remove(self.id)
|
||||
// - self.queue : unlinked if _queued is set
|
||||
// - self.in_use / self.ready_queue : via removeConn
|
||||
// - self.dirty : drained at end of each perform; nothing left here
|
||||
// Any non-empty list means a transfer escaped cleanup — assert so we
|
||||
// catch the regression rather than silently leaking on next use.
|
||||
if (comptime IS_DEBUG) {
|
||||
std.debug.assert(self.transfers.size == 0);
|
||||
std.debug.assert(self.queue.first == null);
|
||||
std.debug.assert(self.in_use.first == null);
|
||||
std.debug.assert(self.ready_queue.first == null);
|
||||
std.debug.assert(self.dirty.first == null);
|
||||
}
|
||||
}
|
||||
|
||||
fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_id: u32, opts: AbortOpts) void {
|
||||
var n = list.first;
|
||||
// Kill every transfer + websocket owned by `owner`. Used when the owner
|
||||
// (Frame / WorkerGlobalScope) is being torn down. After this returns,
|
||||
// every WebSocket is fully gone; HTTP transfers that were mid-perform may
|
||||
// still be on `owner.transfers` (Transfer.kill defers their deinit), but
|
||||
// they've been unlinked from the owner list via kill()'s deferred branch
|
||||
// so the owner is free to die.
|
||||
pub fn abortOwner(self: *Client, owner: *Owner) void {
|
||||
self.abortRequests(owner);
|
||||
var n = owner.websockets.first;
|
||||
while (n) |node| {
|
||||
n = node.next;
|
||||
const conn: *http.Connection = @fieldParentPtr("node", node);
|
||||
switch (conn.transport) {
|
||||
.http => |transfer| {
|
||||
const params = transfer.req.params;
|
||||
if (comptime abort_all) {
|
||||
transfer.kill();
|
||||
} else if (params.frame_id == frame_id) {
|
||||
if (opts.scope == .full or !params.protect_from_abort) {
|
||||
transfer.kill();
|
||||
}
|
||||
}
|
||||
},
|
||||
.websocket => |ws| {
|
||||
if ((comptime abort_all) or ws._frame._frame_id == frame_id) {
|
||||
ws.kill();
|
||||
}
|
||||
},
|
||||
.none => unreachable,
|
||||
}
|
||||
const ws: *@import("webapi/net/WebSocket.zig") = @fieldParentPtr("_owner_node", node);
|
||||
ws.kill();
|
||||
}
|
||||
if (comptime IS_DEBUG) {
|
||||
std.debug.assert(owner.websockets.first == null);
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP-only variant. WebSockets survive (they're cross-document by
|
||||
// design). Used by the navigation path that aborts in-flight resource
|
||||
// loads for a frame but lets its WebSockets keep running.
|
||||
pub fn abortRequests(_: *Client, owner: *Owner) void {
|
||||
var n = owner.transfers.first;
|
||||
while (n) |node| {
|
||||
n = node.next;
|
||||
const t: *Transfer = @fieldParentPtr("owner_node", node);
|
||||
t.kill();
|
||||
}
|
||||
// owner.transfers may still have entries: Transfer.kill defers
|
||||
// (flags `aborted` + noops callbacks) when called mid-perform and
|
||||
// only fully deinits later via processOneMessage. The deferred-branch
|
||||
// unlinks the node and clears Transfer.owner, so by the time the
|
||||
// owner itself is freed, no orphan transfer points at it.
|
||||
}
|
||||
|
||||
pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
|
||||
while (self.queue.popFirst()) |queue_node| {
|
||||
const transfer: *Transfer = @fieldParentPtr("_node", queue_node);
|
||||
const conn = self.network.getConnection() orelse {
|
||||
self.queue.prepend(queue_node);
|
||||
break;
|
||||
};
|
||||
|
||||
try self.makeRequest(conn, @fieldParentPtr("_node", queue_node));
|
||||
// Cleared only after we've successfully obtained a connection;
|
||||
// if we put the node back, _queued stays true.
|
||||
transfer._queued = false;
|
||||
try self.makeRequest(conn, transfer);
|
||||
}
|
||||
|
||||
return self.perform(@intCast(timeout_ms));
|
||||
}
|
||||
|
||||
pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void {
|
||||
const self: *Client = @ptrCast(@alignCast(ptr));
|
||||
const transfer = try self.makeTransfer(req);
|
||||
return self.process(transfer);
|
||||
// last layer
|
||||
pub fn _request(_: *anyopaque, transfer: *Transfer) !void {
|
||||
return transfer.client.process(transfer);
|
||||
}
|
||||
|
||||
pub fn request(self: *Client, req: Request) !void {
|
||||
// Assign Request Id.
|
||||
var our_req = req;
|
||||
our_req.params.request_id = self.incrReqId();
|
||||
// Ownership contract: from the moment this function is entered, the
|
||||
// HttpClient owns `req` — specifically `req.params.headers` (a curl_slist).
|
||||
// On success, transfer.deinit eventually frees it. On any failure path
|
||||
// inside this function, we free it before returning the error. Callers
|
||||
// must NOT pair `request()` with their own `errdefer headers.deinit()`
|
||||
// — that's a double-free.
|
||||
pub fn request(self: *Client, req: Request, owner: ?*Owner) !void {
|
||||
const arena = self.arena_pool.acquire(.small, "Request.arena") catch |err| {
|
||||
req.params.headers.deinit();
|
||||
return err;
|
||||
};
|
||||
|
||||
const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena");
|
||||
our_req.params.arena = arena;
|
||||
const transfer = arena.create(Transfer) catch |err| {
|
||||
req.params.headers.deinit();
|
||||
self.arena_pool.release(arena);
|
||||
return err;
|
||||
};
|
||||
|
||||
return self.entry_layer.request(self, our_req) catch |err| {
|
||||
our_req.error_callback(our_req.ctx, err);
|
||||
self.deinitRequest(our_req);
|
||||
transfer.* = .{
|
||||
.req = req,
|
||||
.url = req.params.url,
|
||||
.client = self,
|
||||
// owner is set AFTER we've actually appended to the owner list,
|
||||
// so transfer.deinit's `if (self.owner)` branch only fires when
|
||||
// we're truly linked. Otherwise we'd try to remove a node from
|
||||
// a list it was never in.
|
||||
.owner = null,
|
||||
.arena = arena,
|
||||
.id = self.incrReqId(),
|
||||
.start_time = timestamp(.monotonic),
|
||||
.owner_node = .{},
|
||||
};
|
||||
|
||||
// From here, transfer owns req+arena. Any subsequent failure flows
|
||||
// through transfer.deinit (or transfer.abort), which handles headers
|
||||
// via req.deinit. Do NOT free headers directly past this point.
|
||||
|
||||
// Register for id-based lookup. putNoClobber would fail if request_id
|
||||
// collides (i.e. we've wrapped through 2^32 requests and the old
|
||||
// transfer is still alive — practically never).
|
||||
self.transfers.putNoClobber(self.allocator, transfer.id, transfer) catch |err| {
|
||||
transfer.deinit();
|
||||
return err;
|
||||
};
|
||||
|
||||
if (owner) |o| {
|
||||
o.addTransfer(transfer);
|
||||
transfer.owner = o;
|
||||
}
|
||||
|
||||
// From this point forward, the transfer owns `req` and `arena`. If the
|
||||
// layer chain fails before any layer commits the transfer to an external
|
||||
// owner (queue / multi handle / pending interception), we clean up here
|
||||
// via transfer.abort which fires error_callback and deinits.
|
||||
self.entry_layer.request(transfer) catch |err| {
|
||||
if (!transfer.loop_owned) {
|
||||
transfer.abort(err);
|
||||
}
|
||||
return err;
|
||||
};
|
||||
}
|
||||
@@ -474,7 +516,7 @@ pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) !
|
||||
.done_callback = SyncContext.doneCallback,
|
||||
.error_callback = SyncContext.errorCallback,
|
||||
.shutdown_callback = SyncContext.shutdownCallback,
|
||||
});
|
||||
}, null);
|
||||
|
||||
while (sync_ctx.completion == .in_progress) {
|
||||
const status = try self.tick(200);
|
||||
@@ -511,6 +553,8 @@ fn process(self: *Client, transfer: *Transfer) !void {
|
||||
}
|
||||
|
||||
self.queue.append(&transfer._node);
|
||||
transfer._queued = true;
|
||||
transfer.loop_owned = true;
|
||||
}
|
||||
|
||||
pub fn nextReqId(self: *Client) u32 {
|
||||
@@ -523,37 +567,6 @@ pub fn incrReqId(self: *Client) u32 {
|
||||
return id;
|
||||
}
|
||||
|
||||
fn makeTransfer(self: *Client, req: Request) !*Transfer {
|
||||
const transfer = try self.transfer_pool.create();
|
||||
errdefer self.transfer_pool.destroy(transfer);
|
||||
|
||||
transfer.* = .{
|
||||
.start_time = timestamp(.monotonic),
|
||||
.id = req.params.request_id,
|
||||
.url = req.params.url,
|
||||
.req = req,
|
||||
.client = self,
|
||||
};
|
||||
return transfer;
|
||||
}
|
||||
|
||||
fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void {
|
||||
if (transfer._notified_fail) {
|
||||
// we can force a failed request within a callback, which will eventually
|
||||
// result in this being called again in the more general loop. We do this
|
||||
// because we can raise a more specific error inside a callback in some cases
|
||||
return;
|
||||
}
|
||||
|
||||
transfer._notified_fail = true;
|
||||
|
||||
if (execute_callback) {
|
||||
transfer.req.error_callback(transfer.req.ctx, err);
|
||||
} else if (transfer.req.shutdown_callback) |cb| {
|
||||
cb(transfer.req.ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// Same restriction as changeProxy. Should be ok since this is only called on
|
||||
// BrowserContext deinit.
|
||||
pub fn restoreOriginalProxy(self: *Client) !void {
|
||||
@@ -573,27 +586,28 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer
|
||||
transfer._conn = conn;
|
||||
errdefer {
|
||||
transfer._conn = null;
|
||||
transfer.deinit();
|
||||
self.releaseConn(conn);
|
||||
}
|
||||
|
||||
try transfer.configureConn(conn);
|
||||
}
|
||||
|
||||
// As soon as this is called, our "perform" loop is responsible for
|
||||
// cleaning things up. That's why the above code is in a block. If anything
|
||||
// fails BEFORE `curl_multi_add_handle` succeeds, the we still need to do
|
||||
// cleanup. But if things fail after `curl_multi_add_handle`, we expect
|
||||
// perform to pickup the failure and cleanup.
|
||||
// As soon as trackConn succeeds, the multi handle owns the transfer's
|
||||
// lifecycle. perform/processMessages will eventually invoke completion
|
||||
// callbacks and call transfer.deinit. We flag loop_owned so Client.request
|
||||
// (or anyone else holding the transfer pointer) knows not to deinit it.
|
||||
self.trackConn(conn) catch |err| {
|
||||
transfer._conn = null;
|
||||
transfer.deinit();
|
||||
return err;
|
||||
};
|
||||
transfer.loop_owned = true;
|
||||
|
||||
if (transfer.req.start_callback) |cb| {
|
||||
cb(Response.fromTransfer(transfer)) catch |err| {
|
||||
transfer.deinit();
|
||||
// We're now committed to the multi. transfer.abort fires the
|
||||
// error_callback and tears down (removeConn handles the
|
||||
// already-in-multi case via the dirty queue).
|
||||
transfer.abort(err);
|
||||
return err;
|
||||
};
|
||||
}
|
||||
@@ -868,11 +882,6 @@ fn ensureNoActiveConnection(self: *const Client) !void {
|
||||
}
|
||||
|
||||
pub const RequestParams = struct {
|
||||
/// This is unsafe to access until you pass it to `Client.request()` where it gets assigned.
|
||||
arena: Allocator = undefined,
|
||||
/// This is unsafe to access until you pass it to `Client.request()` where it gets assigned.
|
||||
request_id: u32 = undefined,
|
||||
|
||||
frame_id: u32,
|
||||
loader_id: u32,
|
||||
method: Method,
|
||||
@@ -885,15 +894,7 @@ pub const RequestParams = struct {
|
||||
credentials: ?[:0]const u8 = null,
|
||||
notification: *Notification,
|
||||
timeout_ms: u32 = 0,
|
||||
|
||||
// Set on an in-flight root-navigation transfer that was issued against a
|
||||
// pending Page. The old Page's frame.deinit (called from Session.commit
|
||||
// PendingPage when response headers arrive) calls abortFrame() on the
|
||||
// shared frame_id; abortFrame's default .normal scope skips transfers
|
||||
// with this flag so the callback chain we are sitting inside isn't killed
|
||||
// mid-flight. Session.discardPendingPage uses .full scope to override
|
||||
// the flag in failure paths.
|
||||
protect_from_abort: bool = false,
|
||||
skip_robots: bool = false,
|
||||
|
||||
const ResourceType = enum {
|
||||
document,
|
||||
@@ -939,9 +940,9 @@ pub const Request = struct {
|
||||
error_callback: ErrorCallback,
|
||||
shutdown_callback: ?ShutdownCallback = null,
|
||||
|
||||
pub fn getCookieString(self: *Request) !?[:0]const u8 {
|
||||
pub fn getCookieString(self: *Request, arena: Allocator) !?[:0]const u8 {
|
||||
const jar = self.params.cookie_jar orelse return null;
|
||||
var aw: std.Io.Writer.Allocating = .init(self.params.arena);
|
||||
var aw: std.Io.Writer.Allocating = .init(arena);
|
||||
try jar.forRequest(self.params.url, &aw.writer, .{
|
||||
.is_http = true,
|
||||
.origin_url = self.params.cookie_origin,
|
||||
@@ -1069,6 +1070,26 @@ pub const SyncResponse = struct {
|
||||
|
||||
pub const Transfer = struct {
|
||||
id: u32 = 0,
|
||||
arena: Allocator,
|
||||
|
||||
owner: ?*Owner,
|
||||
owner_node: std.DoublyLinkedList.Node = .{},
|
||||
|
||||
// Latched true by the first commit point that hands the transfer off to
|
||||
// an external owner: client.queue.append, successful trackConn, or
|
||||
// InterceptionLayer pausing for a CDP response. Once set, Client.request's
|
||||
// errdefer skips cleanup — whoever now owns the transfer will deinit it.
|
||||
loop_owned: bool = false,
|
||||
|
||||
// True iff `_node` is currently linked in `client.queue` (waiting for a
|
||||
// libcurl handle). Set in `Client.process` on enqueue, cleared in
|
||||
// `Client.tick` on popFirst, and used by `Transfer.deinit` to safely
|
||||
// unlink — `deinit` has no other way to detect queue membership, and
|
||||
// a transfer aborted while queued (e.g. via owner-list abort) would
|
||||
// otherwise leave a dangling `_node` in `client.queue` that the next
|
||||
// `tick` would dereference and hand to libcurl.
|
||||
_queued: bool = false,
|
||||
|
||||
req: Request,
|
||||
url: [:0]const u8,
|
||||
client: *Client,
|
||||
@@ -1119,65 +1140,126 @@ pub const Transfer = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn deinit(self: *Transfer) void {
|
||||
pub fn deinit(self: *Transfer) void {
|
||||
if (self._conn) |conn| {
|
||||
self.client.removeConn(conn);
|
||||
self._conn = null;
|
||||
}
|
||||
|
||||
self.client.deinitRequest(self.req);
|
||||
self.client.transfer_pool.destroy(self);
|
||||
}
|
||||
|
||||
pub fn abort(self: *Transfer, err: anyerror) void {
|
||||
self.requestFailed(err, true);
|
||||
|
||||
if (self._performing or self.client.performing) {
|
||||
// We're currently in a curl_multi_perform. We cannot call
|
||||
// curl_multi_remove_handle from a curl callback. Instead, we flag
|
||||
// this transfer and our callbacks will check for this flag.
|
||||
self.aborted = true;
|
||||
return;
|
||||
// Unlink from client.queue if we were waiting for a handle.
|
||||
// Without this, deinit'ing a queued transfer (e.g. via owner-list
|
||||
// abort during navigation) leaves a dangling _node in the queue
|
||||
// that the next tick would pop and hand to libcurl → UAF.
|
||||
if (self._queued) {
|
||||
self.client.queue.remove(&self._node);
|
||||
self._queued = false;
|
||||
}
|
||||
|
||||
self.deinit();
|
||||
// Drop the id→*Transfer index entry before freeing the memory.
|
||||
// Any concurrent CDP lookup by id will now see this transfer as gone.
|
||||
_ = self.client.transfers.remove(self.id);
|
||||
|
||||
self.req.deinit();
|
||||
if (self.owner) |o| {
|
||||
o.removeTransfer(self);
|
||||
}
|
||||
// The Transfer itself lives on this arena, so this must be last —
|
||||
// `self` is invalid memory after release.
|
||||
const arena_pool = self.client.arena_pool;
|
||||
const arena = self.arena;
|
||||
arena_pool.release(arena);
|
||||
}
|
||||
|
||||
pub fn terminate(self: *Transfer) void {
|
||||
self.requestFailed(error.Shutdown, false);
|
||||
self.deinit();
|
||||
// Cancel this transfer with `err`. Fires error_callback once (latched
|
||||
// via _notified_fail), then either deinits synchronously or, if we're
|
||||
// mid-perform with a libcurl handle still in the multi, detaches and
|
||||
// lets the natural processOneMessage flow deinit later.
|
||||
//
|
||||
// This is the ONE entry point external callers should use to cancel
|
||||
// a transfer. Don't reach for kill() or requestFailed() directly —
|
||||
// they're internal helpers.
|
||||
pub fn abort(self: *Transfer, err: anyerror) void {
|
||||
self.requestFailed(err, true);
|
||||
self.detachOrDeinit();
|
||||
}
|
||||
|
||||
// internal, when the frame is shutting down. Doesn't have the same ceremony
|
||||
// as abort (doesn't send a notification, doesn't invoke an error callback)
|
||||
// Owner-driven teardown: fires shutdown_callback (not error_callback)
|
||||
// and otherwise behaves like abort. Called by Client.abortOwner /
|
||||
// abortRequests when a Frame / WGS is being torn down.
|
||||
fn kill(self: *Transfer) void {
|
||||
if (self.req.shutdown_callback) |cb| {
|
||||
cb(self.req.ctx);
|
||||
}
|
||||
|
||||
if (self._performing or self.client.performing) {
|
||||
// We're currently inside of a callback. This client, and libcurl
|
||||
// generally don't expect a transfer to become deinitialized during
|
||||
// a callback. We can flag the transfer as aborted (which is what
|
||||
// we do when transfer.abort() is called in this condition) AND,
|
||||
// since this "kill()"should prevent any future callbacks, the best
|
||||
// we can do is null/noop them.
|
||||
self.aborted = true;
|
||||
self.req.start_callback = null;
|
||||
self.req.shutdown_callback = null;
|
||||
self.req.header_callback = Noop.headerCallback;
|
||||
self.req.data_callback = Noop.dataCallback;
|
||||
self.req.done_callback = Noop.doneCallback;
|
||||
self.req.error_callback = Noop.errorCallback;
|
||||
return;
|
||||
}
|
||||
|
||||
self.deinit();
|
||||
self.detachOrDeinit();
|
||||
}
|
||||
|
||||
// We can force a failed request within a callback, which will eventually
|
||||
// result in this being called again in the more general loop. We do this
|
||||
// because we can raise a more specific error inside a callback in some cases.
|
||||
// Decide whether to tear down now or defer until processOneMessage
|
||||
// eventually drains the in-flight curl handle.
|
||||
//
|
||||
// Two cases force deferral:
|
||||
// * `_performing` — processOneMessage is currently processing THIS
|
||||
// transfer (set/cleared around the callback chain). It will call
|
||||
// `transfer.deinit` itself after the chain returns; deiniting
|
||||
// here would double-free. Note that `_conn` is cleared partway
|
||||
// through this window (the "release conn ASAP" step before
|
||||
// done_callback fires), so we cannot rely on `_conn != null`.
|
||||
// * `client.performing` + we have a libcurl handle — libcurl could
|
||||
// still fire callbacks for us. Releasing the arena now would UAF
|
||||
// from inside curl.
|
||||
//
|
||||
// Otherwise (parked / queued / never-trackConn'd / fully drained),
|
||||
// there is nothing left referencing this transfer and we can safely
|
||||
// deinit inline even from inside a perform callback.
|
||||
fn detachOrDeinit(self: *Transfer) void {
|
||||
const must_defer = self._performing or
|
||||
(self.client.performing and self._conn != null);
|
||||
if (must_defer) {
|
||||
self.detachInPerform();
|
||||
} else {
|
||||
self.deinit();
|
||||
}
|
||||
}
|
||||
|
||||
// Deferred-cleanup path when we can't synchronously deinit.
|
||||
//
|
||||
// We:
|
||||
// - flag `aborted` so processOneMessage's normal-completion paths
|
||||
// short-circuit when they next see this transfer,
|
||||
// - noop every user callback so libcurl naturally draining the
|
||||
// in-flight response can't re-enter user code,
|
||||
// - unlink from owner.transfers and clear `owner` so the owning
|
||||
// Frame/WGS can be freed while this transfer is still draining.
|
||||
// transfer.deinit (called later by processOneMessage) sees
|
||||
// `owner == null` and skips the list-remove that would otherwise
|
||||
// UAF against a freed list.
|
||||
fn detachInPerform(self: *Transfer) void {
|
||||
self.aborted = true;
|
||||
self.req.start_callback = null;
|
||||
self.req.shutdown_callback = null;
|
||||
self.req.header_callback = Noop.headerCallback;
|
||||
self.req.data_callback = Noop.dataCallback;
|
||||
self.req.done_callback = Noop.doneCallback;
|
||||
self.req.error_callback = Noop.errorCallback;
|
||||
if (self.owner) |o| {
|
||||
o.removeTransfer(self);
|
||||
self.owner = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Internal failure-notification helper. Latches via _notified_fail so
|
||||
// multiple paths racing to report the same failure only fire one
|
||||
// notification. Goes through transfer.req — so layer wrappers
|
||||
// (InterceptContext, CacheContext) see the failure and can propagate
|
||||
// it up the chain.
|
||||
//
|
||||
// Not part of the external API: callers cancelling a transfer should
|
||||
// use transfer.abort(err) instead, which goes through this and also
|
||||
// handles the deinit / detach side. The internal HttpClient flow uses
|
||||
// this directly (from processOneMessage) because it's already paired
|
||||
// with the natural processMessages → transfer.deinit handoff.
|
||||
//
|
||||
// execute_callback=true → fires error_callback. false → fires
|
||||
// shutdown_callback (used by Frame shutdown / WGS teardown).
|
||||
fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void {
|
||||
if (self._notified_fail) return;
|
||||
self._notified_fail = true;
|
||||
@@ -1212,7 +1294,7 @@ pub const Transfer = struct {
|
||||
try conn.setHeaders(&header_list);
|
||||
|
||||
// Add cookies from cookie jar.
|
||||
if (try self.req.getCookieString()) |cookies| {
|
||||
if (try self.req.getCookieString(self.arena)) |cookies| {
|
||||
try conn.setCookies(@ptrCast(cookies.ptr));
|
||||
}
|
||||
|
||||
@@ -1289,7 +1371,7 @@ pub const Transfer = struct {
|
||||
fn handleRedirect(transfer: *Transfer) !void {
|
||||
const req = &transfer.req;
|
||||
const conn = transfer._conn.?;
|
||||
const arena = transfer.req.params.arena;
|
||||
const arena = transfer.arena;
|
||||
|
||||
transfer._redirect_count += 1;
|
||||
if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) {
|
||||
@@ -1466,7 +1548,7 @@ pub const Transfer = struct {
|
||||
transfer._callback_error = error.ResponseTooLarge;
|
||||
return http.writefunc_error;
|
||||
}
|
||||
transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {};
|
||||
transfer._stream_buffer.ensureTotalCapacity(transfer.arena, cl) catch {};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1479,7 +1561,7 @@ pub const Transfer = struct {
|
||||
}
|
||||
|
||||
const chunk = buffer[0..chunk_len];
|
||||
transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| {
|
||||
transfer._stream_buffer.appendSlice(transfer.arena, chunk) catch |err| {
|
||||
transfer._callback_error = err;
|
||||
return http.writefunc_error;
|
||||
};
|
||||
@@ -1541,11 +1623,6 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void {
|
||||
return self.process(transfer);
|
||||
}
|
||||
|
||||
pub fn deinitRequest(self: *Client, req: Request) void {
|
||||
req.deinit();
|
||||
self.network.app.arena_pool.release(req.params.arena);
|
||||
}
|
||||
|
||||
const Noop = struct {
|
||||
fn headerCallback(_: Response) !bool {
|
||||
return true;
|
||||
@@ -1554,3 +1631,28 @@ const Noop = struct {
|
||||
fn doneCallback(_: *anyopaque) !void {}
|
||||
fn errorCallback(_: *anyopaque, _: anyerror) void {}
|
||||
};
|
||||
|
||||
// An opaque-from-the-outside handle that Frame / WorkerGlobalScope embed
|
||||
// to track the HTTP transfers + WebSockets they own.
|
||||
pub const Owner = struct {
|
||||
transfers: std.DoublyLinkedList = .{},
|
||||
websockets: std.DoublyLinkedList = .{},
|
||||
|
||||
const WebSocket = @import("webapi/net/WebSocket.zig");
|
||||
|
||||
pub fn addTransfer(self: *Owner, t: *Transfer) void {
|
||||
self.transfers.append(&t.owner_node);
|
||||
}
|
||||
|
||||
pub fn removeTransfer(self: *Owner, t: *Transfer) void {
|
||||
self.transfers.remove(&t.owner_node);
|
||||
}
|
||||
|
||||
pub fn addWS(self: *Owner, ws: *WebSocket) void {
|
||||
self.websockets.append(&ws._owner_node);
|
||||
}
|
||||
|
||||
pub fn removeWS(self: *Owner, ws: *WebSocket) void {
|
||||
self.websockets.remove(&ws._owner_node);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -119,10 +119,8 @@ queued_close: std.ArrayList(*Frame) = .empty,
|
||||
// destination of a root navigation — its V8 context exists but is not yet the
|
||||
// session's active context. Flipped to `.active` by Session.commitPendingPage
|
||||
// when response headers arrive. Frame.navigate / frameHeaderDoneCallback
|
||||
// branch on this to: (a) stamp `is_pending_root` on the frame_navigate
|
||||
// branch on this to stamp `is_pending_root` on the frame_navigate
|
||||
// notification (so CDP doesn't reset its node registry yet) and
|
||||
// (b) flag the HTTP request `protect_from_abort` (so the old page's deinit
|
||||
// can't kill the transfer we're sitting inside).
|
||||
_state: enum { active, pending } = .active,
|
||||
|
||||
// Initialize a Page and its root Frame.
|
||||
|
||||
@@ -66,6 +66,9 @@ pub fn waitCDP(self: *Runner, opts: WaitOpts) !CDPWaitResult {
|
||||
}
|
||||
|
||||
fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult {
|
||||
const session = self.session;
|
||||
const browser = session.browser;
|
||||
|
||||
var timer = try std.time.Timer.start();
|
||||
|
||||
const tick_opts = TickOpts{
|
||||
@@ -85,8 +88,9 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult {
|
||||
if (gc_hint_timer.read() >= gc_hint_period_ns) {
|
||||
gc_hint_timer.reset();
|
||||
self.frame._page.cleanupClosedPopups();
|
||||
self.session.browser.env.memoryPressureNotification(.moderate);
|
||||
browser.env.memoryPressureNotification(.moderate);
|
||||
}
|
||||
session.processQueuedDestroyed();
|
||||
|
||||
const tick_result = self._tick(is_cdp, tick_opts) catch |err| {
|
||||
switch (err) {
|
||||
|
||||
@@ -256,7 +256,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
|
||||
// Let the outer errdefer handle releasing the arena if client.request fails
|
||||
}
|
||||
|
||||
try self.base.client.request(.{
|
||||
try frame.makeRequest(.{
|
||||
.ctx = script,
|
||||
.params = .{
|
||||
.url = url,
|
||||
|
||||
@@ -47,8 +47,7 @@ pub const Owner = union(enum) {
|
||||
|
||||
pub fn url(self: Owner) [:0]const u8 {
|
||||
return switch (self) {
|
||||
.frame => |f| f.url,
|
||||
.worker => |w| w.url,
|
||||
inline else => |g| g.url,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -68,23 +67,26 @@ pub const Owner = union(enum) {
|
||||
|
||||
pub fn session(self: Owner) *Session {
|
||||
return switch (self) {
|
||||
.frame => |f| f._session,
|
||||
.worker => |w| w._session,
|
||||
inline else => |g| g._session,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn jsContext(self: Owner) *js.Context {
|
||||
return switch (self) {
|
||||
.frame => |f| f.js,
|
||||
.worker => |w| w.js,
|
||||
inline else => |g| g.js,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn addHeaders(self: Owner, headers: *HttpClient.Headers) !void {
|
||||
switch (self) {
|
||||
.frame => |f| try f.headersForRequest(headers),
|
||||
.worker => {},
|
||||
}
|
||||
return switch (self) {
|
||||
inline else => |g| g.headersForRequest(headers),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn makeRequest(self: Owner, req: HttpClient.Request) !void {
|
||||
return switch (self) {
|
||||
inline else => |g| g.makeRequest(req),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -259,17 +261,18 @@ pub fn preloadImport(self: *ScriptManagerBase, url: [:0]const u8, referrer: []co
|
||||
// called).
|
||||
self.async_scripts.append(&script.node);
|
||||
|
||||
const session = self.owner.session();
|
||||
self.client.request(.{
|
||||
const owner = self.owner;
|
||||
const session = owner.session();
|
||||
owner.makeRequest(.{
|
||||
.ctx = script,
|
||||
.params = .{
|
||||
.url = url,
|
||||
.method = .GET,
|
||||
.frame_id = self.owner.frameId(),
|
||||
.loader_id = self.owner.loaderId(),
|
||||
.frame_id = owner.frameId(),
|
||||
.loader_id = owner.loaderId(),
|
||||
.headers = try self.getHeaders(),
|
||||
.cookie_jar = &session.cookie_jar,
|
||||
.cookie_origin = self.owner.url(),
|
||||
.cookie_origin = owner.url(),
|
||||
.resource_type = .script,
|
||||
.notification = session.notification,
|
||||
},
|
||||
@@ -364,19 +367,20 @@ pub fn getAsyncImport(self: *ScriptManagerBase, url: [:0]const u8, cb: ImportAsy
|
||||
self.is_evaluating = true;
|
||||
defer self.is_evaluating = was_evaluating;
|
||||
|
||||
const owner = self.owner;
|
||||
const session = self.owner.session();
|
||||
self.async_scripts.append(&script.node);
|
||||
self.client.request(.{
|
||||
owner.makeRequest(.{
|
||||
.ctx = script,
|
||||
.params = .{
|
||||
.url = url,
|
||||
.method = .GET,
|
||||
.frame_id = self.owner.frameId(),
|
||||
.loader_id = self.owner.loaderId(),
|
||||
.frame_id = owner.frameId(),
|
||||
.loader_id = owner.loaderId(),
|
||||
.headers = try self.getHeaders(),
|
||||
.resource_type = .script,
|
||||
.cookie_jar = &session.cookie_jar,
|
||||
.cookie_origin = self.owner.url(),
|
||||
.cookie_origin = owner.url(),
|
||||
.notification = session.notification,
|
||||
},
|
||||
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
|
||||
|
||||
@@ -74,6 +74,8 @@ _active: ?*Page = null,
|
||||
// In-flight root navigation
|
||||
_pending: ?*Page = null,
|
||||
|
||||
_queued_destroy: std.ArrayList(*Page) = .{},
|
||||
|
||||
// IDs. Kept at Session level so IDs can remain unique across Page replacements.
|
||||
frame_id_gen: u32 = 0,
|
||||
loader_id_gen: u32 = 0,
|
||||
@@ -111,6 +113,8 @@ pub fn deinit(self: *Session) void {
|
||||
if (self._active != null) {
|
||||
self.removePage();
|
||||
}
|
||||
self.processQueuedDestroyed();
|
||||
|
||||
self.cookie_jar.deinit();
|
||||
|
||||
// Force V8 to flush any remaining weak callbacks while
|
||||
@@ -124,6 +128,14 @@ pub fn deinit(self: *Session) void {
|
||||
self.arena_pool.release(self.arena);
|
||||
}
|
||||
|
||||
pub fn processQueuedDestroyed(self: *Session) void {
|
||||
for (self._queued_destroy.items) |page| {
|
||||
page.deinit();
|
||||
self.browser.page_pool.destroy(page);
|
||||
}
|
||||
self._queued_destroy.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
// True iff there is an active Page. CDP / external callers should use this
|
||||
// (or `currentPage()`) rather than poking at the underlying field.
|
||||
pub fn hasPage(self: *const Session) bool {
|
||||
@@ -141,8 +153,7 @@ fn allocatePage(self: *Session, frame_id: u32) !*Page {
|
||||
|
||||
// Tear down and free a Page allocated via allocatePage.
|
||||
fn destroyPage(self: *Session, page: *Page) void {
|
||||
page.deinit();
|
||||
self.browser.page_pool.destroy(page);
|
||||
self._queued_destroy.append(self.arena, page) catch @panic("OOM");
|
||||
}
|
||||
|
||||
// Tear down the currently-active Page. Dispatches `frame_remove` first
|
||||
@@ -163,6 +174,8 @@ fn tearDownActivePage(self: *Session) void {
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
page.frame.abortTransfers();
|
||||
self.destroyPage(page);
|
||||
self._active = null;
|
||||
self.navigation.onRemoveFrame();
|
||||
@@ -532,12 +545,6 @@ pub fn initiateRootNavigation(self: *Session, frame_id: u32, url: [:0]const u8,
|
||||
// response for the request we are committing was just inserted by
|
||||
// onHttpResponseHeadersDone moments earlier and must survive).
|
||||
// 4. pending_page = null. Order matters: step 3 reads it.
|
||||
// 5. OLD Page.deinit + free LAST. Its frame.deinit calls
|
||||
// http_client.abortFrame(frame_id) on the frame_id that the OLD
|
||||
// page shares with the now-active pending page; the in-flight
|
||||
// navigation transfer (whose callback we are inside) is shielded
|
||||
// by protect_from_abort, which abortFrame's default .normal scope
|
||||
// honors. The caller clears the flag AFTER we return.
|
||||
pub fn commitPendingPage(self: *Session) !void {
|
||||
const pending = self._pending orelse {
|
||||
lp.assert(false, "Session.commitPendingPage - no pending page", .{});
|
||||
@@ -574,10 +581,12 @@ pub fn commitPendingPage(self: *Session) !void {
|
||||
|
||||
// Step 5: tear down the OLD page LAST. Anything in steps 1-4 that
|
||||
// needed to walk the OLD page's state (CDP node_registry, inspector
|
||||
// context group, isolated worlds) has already done so. The OLD page's
|
||||
// frame.deinit calls http_client.abortFrame(frame_id) on the frame_id
|
||||
// shared with the pending page; the in-flight transfer survives via
|
||||
// protect_from_abort.
|
||||
// context group, isolated worlds) has already done so. Kill any
|
||||
// remaining transfers/websockets synchronously before queuing for
|
||||
// deferred destroy — otherwise a still-inflight transfer firing its
|
||||
// done_callback after this point would re-enter against the new
|
||||
// _active and trip the half-torn-down session.
|
||||
old_active.frame.abortTransfers();
|
||||
self.destroyPage(old_active);
|
||||
}
|
||||
|
||||
@@ -591,8 +600,9 @@ pub fn discardPendingPage(self: *Session) void {
|
||||
log.debug(.browser, "discard pending page", .{});
|
||||
}
|
||||
|
||||
// Force abort all inflight queries.
|
||||
self.browser.http_client.abortFrame(page.frame._frame_id, .{ .scope = .full });
|
||||
// Force abort all inflight queries (HTTP + WS) before queuing for
|
||||
// deferred destroy.
|
||||
page.frame.abortTransfers();
|
||||
|
||||
self._pending = null;
|
||||
self.destroyPage(page);
|
||||
|
||||
@@ -587,7 +587,7 @@ test "Env: Worker context " {
|
||||
const frame = try session.createPage();
|
||||
defer session.removePage();
|
||||
|
||||
const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", &frame.js.execution);
|
||||
const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", frame);
|
||||
|
||||
var ls: js.Local.Scope = undefined;
|
||||
worker._worker_scope.js.localScope(&ls);
|
||||
|
||||
@@ -96,6 +96,22 @@ pub fn lookupBlobUrl(self: *const Execution, url: []const u8) ?*Blob {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn makeRequest(self: *const Execution, req: HttpClient.Request) !void {
|
||||
return switch (self.context.global) {
|
||||
inline else => |g| g.makeRequest(req),
|
||||
};
|
||||
}
|
||||
|
||||
// HttpClient.Owner of the current global (Frame or WGS). Used by code
|
||||
// that needs to register an in-flight network operation against the
|
||||
// owning scope without caring whether it's a Frame or a Worker — e.g.
|
||||
// WebSocket.init appending to `.websockets`.
|
||||
pub fn httpOwner(self: *const Execution) *HttpClient.Owner {
|
||||
return switch (self.context.global) {
|
||||
inline else => |g| &g._http_owner,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn dispatch(
|
||||
self: *const Execution,
|
||||
target: *EventTarget,
|
||||
|
||||
@@ -58,17 +58,13 @@ _on_error: ?js.Function.Global = null,
|
||||
_on_message: ?js.Function.Global = null,
|
||||
_on_messageerror: ?js.Function.Global = null,
|
||||
|
||||
pub fn init(url: []const u8, exec: *Execution) !*Worker {
|
||||
const frame = switch (exec.context.global) {
|
||||
.frame => |f| f,
|
||||
.worker => return error.WorkerCannotCreateWorker,
|
||||
};
|
||||
pub fn init(url: []const u8, frame: *Frame) !*Worker {
|
||||
const session = frame._session;
|
||||
|
||||
const arena = try session.getArena(.large, "Worker");
|
||||
errdefer session.releaseArena(arena);
|
||||
|
||||
const resolved_url = try URL.resolve(arena, exec.url.*, url, .{ .encoding = frame.charset });
|
||||
const resolved_url = try URL.resolve(arena, frame.base(), url, .{ .encoding = frame.charset });
|
||||
const self = try frame._page.factory.eventTargetWithAllocator(arena, Worker{
|
||||
._arena = arena,
|
||||
._proto = undefined,
|
||||
@@ -92,13 +88,13 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker {
|
||||
return self;
|
||||
}
|
||||
|
||||
const http_client = &session.browser.http_client;
|
||||
http_client.request(.{
|
||||
const headers = try session.browser.http_client.newHeaders();
|
||||
frame.makeRequest(.{
|
||||
.ctx = self,
|
||||
.params = .{
|
||||
.url = resolved_url,
|
||||
.method = .GET,
|
||||
.headers = try http_client.newHeaders(),
|
||||
.headers = headers,
|
||||
.url = resolved_url,
|
||||
.frame_id = self._frame_id,
|
||||
.loader_id = self._loader_id,
|
||||
.resource_type = .script,
|
||||
@@ -122,7 +118,6 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker {
|
||||
// remove from the frame's worker list.
|
||||
pub fn deinit(self: *Worker) void {
|
||||
// No pending frame for workers, so we can abort all frames.
|
||||
self._frame._session.browser.http_client.abortFrame(self._frame_id, .{ .scope = .full });
|
||||
if (self._http_response) |res| {
|
||||
res.abort(error.Abort);
|
||||
self._http_response = null;
|
||||
|
||||
@@ -59,6 +59,8 @@ _page: *Page,
|
||||
_session: *Session,
|
||||
_factory: *Factory,
|
||||
_identity: JS.Identity = .{},
|
||||
_http_owner: HttpClient.Owner = .{},
|
||||
|
||||
arena: Allocator,
|
||||
call_arena: Allocator,
|
||||
url: [:0]const u8,
|
||||
@@ -148,16 +150,21 @@ pub fn init(worker: *Worker, url: [:0]const u8) !*WorkerGlobalScope {
|
||||
}
|
||||
|
||||
pub fn deinit(self: *WorkerGlobalScope) void {
|
||||
const page = self._page;
|
||||
const session = page.session;
|
||||
const browser = session.browser;
|
||||
|
||||
browser.http_client.abortOwner(&self._http_owner);
|
||||
|
||||
self._identity.deinit();
|
||||
self._script_manager.deinit();
|
||||
|
||||
const page = self._page;
|
||||
var it = self._blob_urls.valueIterator();
|
||||
while (it.next()) |blob| {
|
||||
blob.*.releaseRef(page);
|
||||
}
|
||||
page.session.browser.env.destroyContext(self.js);
|
||||
page.releaseArena(self.call_arena);
|
||||
browser.env.destroyContext(self.js);
|
||||
session.releaseArena(self.call_arena);
|
||||
}
|
||||
|
||||
pub fn base(self: *const WorkerGlobalScope) [:0]const u8 {
|
||||
@@ -210,6 +217,10 @@ pub fn lookupBlobUrl(self: *WorkerGlobalScope, url: []const u8) ?*Blob {
|
||||
return self._blob_urls.get(url);
|
||||
}
|
||||
|
||||
pub fn makeRequest(self: *WorkerGlobalScope, req: HttpClient.Request) !void {
|
||||
return self._session.browser.http_client.request(req, &self._http_owner);
|
||||
}
|
||||
|
||||
pub fn getSelf(self: *WorkerGlobalScope) *WorkerGlobalScope {
|
||||
return self;
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ pub fn init(input: Input, options: ?InitOpts, exec: *const Execution) !js.Promis
|
||||
// httpErrorCallback by Client.request, which rejects the promise and
|
||||
// releases response._arena. Propagating the error from here would also
|
||||
// fire the `errdefer response.deinit` above and double-free the arena.
|
||||
http_client.request(.{
|
||||
exec.makeRequest(.{
|
||||
.ctx = fetch,
|
||||
.params = .{
|
||||
.url = request._url,
|
||||
|
||||
@@ -58,6 +58,8 @@ _conn: ?*http.Connection,
|
||||
_http_client: *HttpClient,
|
||||
_req_headers: http.Headers,
|
||||
|
||||
_owner_node: std.DoublyLinkedList.Node = .{},
|
||||
|
||||
// buffered outgoing messages
|
||||
_send_queue: std.ArrayList(Message) = .empty,
|
||||
_send_offset: usize = 0,
|
||||
@@ -148,6 +150,7 @@ pub fn init(url: []const u8, protocols: [][]const u8, frame: *Frame) !*WebSocket
|
||||
});
|
||||
conn.transport = .{ .websocket = self };
|
||||
try http_client.trackConn(conn);
|
||||
frame._http_owner.addWS(self);
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
log.info(.websocket, "connecting", .{ .url = url });
|
||||
@@ -233,6 +236,7 @@ pub fn disconnected(self: *WebSocket, err_: ?anyerror) void {
|
||||
|
||||
fn cleanup(self: *WebSocket) void {
|
||||
if (self._conn) |conn| {
|
||||
self._frame._http_owner.removeWS(self);
|
||||
self._http_client.removeConn(conn);
|
||||
self._req_headers.deinit();
|
||||
self._conn = null;
|
||||
|
||||
@@ -268,7 +268,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?BodyInit, exec_: *const Execution) !v
|
||||
self.acquireRef();
|
||||
self._active_request = true;
|
||||
|
||||
http_client.request(.{
|
||||
exec.makeRequest(.{
|
||||
.ctx = self,
|
||||
.params = .{
|
||||
.url = self._url,
|
||||
|
||||
@@ -28,7 +28,7 @@ const Frame = @import("../browser/Frame.zig");
|
||||
const Mime = @import("../browser/Mime.zig");
|
||||
const Element = @import("../browser/webapi/Element.zig");
|
||||
const Label = @import("../browser/webapi/element/html/Label.zig");
|
||||
const Request = @import("../browser/HttpClient.zig").Request;
|
||||
const Transfer = @import("../browser/HttpClient.zig").Transfer;
|
||||
const CDPClient = @import("../browser/HttpClient.zig").CDPClient;
|
||||
const WsConnection = @import("../network/WsConnection.zig");
|
||||
|
||||
@@ -552,25 +552,21 @@ pub const BrowserContext = struct {
|
||||
env.inspector.?.stopSession();
|
||||
|
||||
// abort all intercepted requests before closing the session/page
|
||||
// since some of these might callback into the page/scriptmanager
|
||||
// since some of these might callback into the page/scriptmanager.
|
||||
// intercept_state stores ids — look each one up; if it's already
|
||||
// gone (out-of-band destroy), there's nothing to abort, but the
|
||||
// intercepted counter still needs decrementing because we
|
||||
// incremented it on pause.
|
||||
const http_client = &browser.http_client;
|
||||
for (self.intercept_state.pendingIntercepts()) |intercept| {
|
||||
defer {
|
||||
lp.assert(
|
||||
http_client.interception_layer.intercepted > 0,
|
||||
"BrowserContext.deinit.intercepted",
|
||||
.{ .value = http_client.interception_layer.intercepted },
|
||||
);
|
||||
http_client.interception_layer.intercepted -= 1;
|
||||
}
|
||||
switch (intercept) {
|
||||
.transfer => |t| {
|
||||
t.abort(error.ClientDisconnect);
|
||||
},
|
||||
.request => |r| {
|
||||
defer http_client.deinitRequest(r);
|
||||
r.error_callback(r.ctx, error.ClientDisconnect);
|
||||
},
|
||||
for (self.intercept_state.pendingIntercepts()) |transfer_id| {
|
||||
lp.assert(
|
||||
http_client.interception_layer.intercepted > 0,
|
||||
"BrowserContext.deinit.intercepted",
|
||||
.{ .value = http_client.interception_layer.intercepted },
|
||||
);
|
||||
http_client.interception_layer.intercepted -= 1;
|
||||
if (http_client.findTransfer(transfer_id)) |transfer| {
|
||||
transfer.abort(error.ClientDisconnect);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -781,11 +777,11 @@ pub const BrowserContext = struct {
|
||||
return @import("domains/page.zig").javascriptDialogOpening(self, msg);
|
||||
}
|
||||
|
||||
fn keyFromRequestReq(req: *const Request) CDP.BrowserContext.CapturedResponseKey {
|
||||
return if (req.params.resource_type == .document)
|
||||
.{ .kind = .loader, .id = req.params.loader_id }
|
||||
fn keyFromTransfer(transfer: *const Transfer) CDP.BrowserContext.CapturedResponseKey {
|
||||
return if (transfer.req.params.resource_type == .document)
|
||||
.{ .kind = .loader, .id = transfer.req.params.loader_id }
|
||||
else
|
||||
.{ .kind = .request, .id = req.params.request_id };
|
||||
.{ .kind = .request, .id = transfer.id };
|
||||
}
|
||||
|
||||
pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void {
|
||||
@@ -795,7 +791,7 @@ pub const BrowserContext = struct {
|
||||
const arena = self.frame_arena;
|
||||
|
||||
// Prepare the captured response value.
|
||||
const key = keyFromRequestReq(msg.request);
|
||||
const key = keyFromTransfer(msg.transfer);
|
||||
const gop = try self.captured_responses.getOrPut(arena, key);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = .{
|
||||
@@ -832,7 +828,7 @@ pub const BrowserContext = struct {
|
||||
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
|
||||
const arena = self.frame_arena;
|
||||
|
||||
const key = keyFromRequestReq(msg.request);
|
||||
const key = keyFromTransfer(msg.transfer);
|
||||
const resp = self.captured_responses.getPtr(key) orelse lp.assert(false, "onHttpResponseData missing captured response", .{});
|
||||
|
||||
return resp.data.appendSlice(arena, msg.data);
|
||||
|
||||
@@ -51,15 +51,15 @@ pub fn processMessage(cmd: *CDP.Command) !void {
|
||||
}
|
||||
}
|
||||
|
||||
// Stored in CDP
|
||||
// Stored in CDP. Holds *transfer ids* (not *Transfer pointers) of paused
|
||||
// transfers waiting for CDP continueRequest/fulfillRequest/failRequest/
|
||||
// continueWithAuth. Anyone resolving an entry must look the transfer up via
|
||||
// `Client.findTransfer(id)` — if the transfer has been destroyed out-of-band
|
||||
// (e.g. frame shutdown), the lookup returns null and the CDP command should
|
||||
// no-op rather than UAF.
|
||||
pub const InterceptState = struct {
|
||||
allocator: Allocator,
|
||||
waiting: std.AutoArrayHashMapUnmanaged(u32, Pending),
|
||||
|
||||
const Pending = union(enum) {
|
||||
transfer: *HttpClient.Transfer,
|
||||
request: HttpClient.Request,
|
||||
};
|
||||
waiting: std.AutoArrayHashMapUnmanaged(u32, void),
|
||||
|
||||
pub fn init(allocator: Allocator) !InterceptState {
|
||||
return .{
|
||||
@@ -72,25 +72,21 @@ pub const InterceptState = struct {
|
||||
return self.waiting.count() == 0;
|
||||
}
|
||||
|
||||
pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void {
|
||||
return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request });
|
||||
pub fn put(self: *InterceptState, transfer_id: u32) !void {
|
||||
return self.waiting.put(self.allocator, transfer_id, {});
|
||||
}
|
||||
|
||||
pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void {
|
||||
return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer });
|
||||
}
|
||||
|
||||
pub fn remove(self: *InterceptState, request_id: u32) ?Pending {
|
||||
const entry = self.waiting.fetchSwapRemove(request_id) orelse return null;
|
||||
return entry.value;
|
||||
// Returns true if the id was present and removed, false otherwise.
|
||||
pub fn remove(self: *InterceptState, transfer_id: u32) bool {
|
||||
return self.waiting.swapRemove(transfer_id);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *InterceptState) void {
|
||||
self.waiting.deinit(self.allocator);
|
||||
}
|
||||
|
||||
pub fn pendingIntercepts(self: *const InterceptState) []Pending {
|
||||
return self.waiting.values();
|
||||
pub fn pendingIntercepts(self: *const InterceptState) []u32 {
|
||||
return self.waiting.keys();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -201,26 +197,26 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification.
|
||||
// We keep it around to wait for modifications to the request.
|
||||
// TODO: What to do when receiving replies for a previous frame's requests?
|
||||
|
||||
const request = intercept.request;
|
||||
try bc.intercept_state.putRequest(request.*);
|
||||
const transfer = intercept.transfer;
|
||||
try bc.intercept_state.put(transfer.id);
|
||||
|
||||
try bc.cdp.sendEvent("Fetch.requestPaused", .{
|
||||
.requestId = &id.toInterceptId(request.params.request_id),
|
||||
.frameId = &id.toFrameId(request.params.frame_id),
|
||||
.request = network.RequestWriter.init(request),
|
||||
.resourceType = switch (request.params.resource_type) {
|
||||
.requestId = &id.toInterceptId(transfer.id),
|
||||
.frameId = &id.toFrameId(transfer.req.params.frame_id),
|
||||
.request = network.RequestWriter.init(transfer),
|
||||
.resourceType = switch (transfer.req.params.resource_type) {
|
||||
.script => "Script",
|
||||
.xhr => "XHR",
|
||||
.document => "Document",
|
||||
.fetch => "Fetch",
|
||||
},
|
||||
.networkId = &id.toRequestId(request), // matches the Network REQ-ID
|
||||
.networkId = &id.toRequestId(transfer), // matches the Network REQ-ID
|
||||
}, .{ .session_id = session_id });
|
||||
|
||||
log.debug(.cdp, "request intercept", .{
|
||||
.state = "paused",
|
||||
.id = request.params.request_id,
|
||||
.url = request.params.url,
|
||||
.id = transfer.id,
|
||||
.url = transfer.url,
|
||||
});
|
||||
// Await either continueRequest, failRequest or fulfillRequest
|
||||
|
||||
@@ -242,20 +238,28 @@ fn continueRequest(cmd: *CDP.Command) !void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
var intercept_state = &bc.intercept_state;
|
||||
const request_id = try idFromRequestId(params.requestId);
|
||||
const transfer_id = try idFromRequestId(params.requestId);
|
||||
|
||||
const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound;
|
||||
var request = pending.request;
|
||||
if (!intercept_state.remove(transfer_id)) return error.RequestNotFound;
|
||||
// Transfer may have been destroyed out-of-band between pause and now
|
||||
// (e.g. frame shutdown). Treat as a no-op rather than an error — the CDP
|
||||
// client's view of "this request still exists" is just stale.
|
||||
const transfer = client.findTransfer(transfer_id) orelse {
|
||||
log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "continue" });
|
||||
return cmd.sendResult(null, .{});
|
||||
};
|
||||
|
||||
log.debug(.cdp, "request intercept", .{
|
||||
.state = "continue",
|
||||
.id = request.params.request_id,
|
||||
.url = request.params.url,
|
||||
.id = transfer.id,
|
||||
.url = transfer.url,
|
||||
.new_url = params.url,
|
||||
});
|
||||
|
||||
const arena = request.params.arena;
|
||||
const arena = transfer.arena;
|
||||
const request = &transfer.req;
|
||||
// Update the request with the new parameters
|
||||
if (params.url) |url| {
|
||||
request.params.url = try arena.dupeZ(u8, url);
|
||||
@@ -285,9 +289,7 @@ fn continueRequest(cmd: *CDP.Command) !void {
|
||||
request.params.body = body;
|
||||
}
|
||||
|
||||
// todo: replace.
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
try client.interception_layer.continueRequest(client, request);
|
||||
try client.interception_layer.continueRequest(transfer);
|
||||
return cmd.sendResult(null, .{});
|
||||
}
|
||||
|
||||
@@ -309,31 +311,37 @@ fn continueWithAuth(cmd: *CDP.Command) !void {
|
||||
},
|
||||
})) orelse return error.InvalidParams;
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
var intercept_state = &bc.intercept_state;
|
||||
const request_id = try idFromRequestId(params.requestId);
|
||||
const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound;
|
||||
const transfer = pending.transfer;
|
||||
const request = transfer.req;
|
||||
const transfer_id = try idFromRequestId(params.requestId);
|
||||
|
||||
if (!intercept_state.remove(transfer_id)) return error.RequestNotFound;
|
||||
const transfer = client.findTransfer(transfer_id) orelse {
|
||||
log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "auth" });
|
||||
return cmd.sendResult(null, .{});
|
||||
};
|
||||
|
||||
log.debug(.cdp, "request intercept", .{
|
||||
.state = "continue with auth",
|
||||
.id = request.params.request_id,
|
||||
.id = transfer.id,
|
||||
.response = params.authChallengeResponse.response,
|
||||
});
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
|
||||
if (params.authChallengeResponse.response != .ProvideCredentials) {
|
||||
transfer.abortAuthChallenge();
|
||||
return cmd.sendResult(null, .{});
|
||||
}
|
||||
|
||||
// cancel the request, deinit the transfer on error.
|
||||
// TODO: double-decrement of interception_layer.intercepted if
|
||||
// continueTransfer fails: continueTransfer decrements unconditionally,
|
||||
// and the errdefer below decrements again via abortAuthChallenge.
|
||||
// Worse: if continueTransfer's failure path destroys the transfer
|
||||
// (start_callback fail in makeRequest), this errdefer hits a freed
|
||||
// transfer. Pre-existing; needs makeRequest failure-semantics cleanup.
|
||||
errdefer transfer.abortAuthChallenge();
|
||||
|
||||
const arena = request.params.arena;
|
||||
transfer.updateCredentials(try std.fmt.allocPrintSentinel(
|
||||
arena,
|
||||
transfer.arena,
|
||||
"{s}:{s}",
|
||||
.{
|
||||
params.authChallengeResponse.username,
|
||||
@@ -363,16 +371,20 @@ fn fulfillRequest(cmd: *CDP.Command) !void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
var intercept_state = &bc.intercept_state;
|
||||
const request_id = try idFromRequestId(params.requestId);
|
||||
const transfer_id = try idFromRequestId(params.requestId);
|
||||
|
||||
const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound;
|
||||
var request = pending.request;
|
||||
if (!intercept_state.remove(transfer_id)) return error.RequestNotFound;
|
||||
const transfer = client.findTransfer(transfer_id) orelse {
|
||||
log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "fulfill" });
|
||||
return cmd.sendResult(null, .{});
|
||||
};
|
||||
|
||||
log.debug(.cdp, "request intercept", .{
|
||||
.state = "fulfilled",
|
||||
.id = request.params.request_id,
|
||||
.url = request.params.url,
|
||||
.id = transfer.id,
|
||||
.url = transfer.url,
|
||||
.status = params.responseCode,
|
||||
.body = params.body != null,
|
||||
});
|
||||
@@ -380,13 +392,12 @@ fn fulfillRequest(cmd: *CDP.Command) !void {
|
||||
var body: ?[]const u8 = null;
|
||||
if (params.body) |b| {
|
||||
const decoder = std.base64.standard.Decoder;
|
||||
const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b));
|
||||
const buf = try transfer.arena.alloc(u8, try decoder.calcSizeForSlice(b));
|
||||
try decoder.decode(buf, b);
|
||||
body = buf;
|
||||
}
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body);
|
||||
try client.interception_layer.fulfillRequest(transfer, params.responseCode, params.responseHeaders orelse &.{}, body);
|
||||
return cmd.sendResult(null, .{});
|
||||
}
|
||||
|
||||
@@ -397,19 +408,22 @@ fn failRequest(cmd: *CDP.Command) !void {
|
||||
errorReason: ErrorReason,
|
||||
})) orelse return error.InvalidParams;
|
||||
|
||||
var intercept_state = &bc.intercept_state;
|
||||
const request_id = try idFromRequestId(params.requestId);
|
||||
|
||||
const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound;
|
||||
const request = pending.request;
|
||||
|
||||
const client = &bc.cdp.browser.http_client;
|
||||
defer client.interception_layer.abortRequest(client, request);
|
||||
var intercept_state = &bc.intercept_state;
|
||||
const transfer_id = try idFromRequestId(params.requestId);
|
||||
|
||||
if (!intercept_state.remove(transfer_id)) return error.RequestNotFound;
|
||||
const transfer = client.findTransfer(transfer_id) orelse {
|
||||
log.debug(.cdp, "intercept lookup miss", .{ .id = transfer_id, .op = "fail" });
|
||||
return cmd.sendResult(null, .{});
|
||||
};
|
||||
|
||||
defer client.interception_layer.abortRequest(transfer);
|
||||
|
||||
log.info(.cdp, "request intercept", .{
|
||||
.state = "fail",
|
||||
.id = request_id,
|
||||
.url = request.params.url,
|
||||
.id = transfer.id,
|
||||
.url = transfer.url,
|
||||
.reason = params.errorReason,
|
||||
});
|
||||
return cmd.sendResult(null, .{});
|
||||
@@ -425,15 +439,15 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati
|
||||
// TODO: What to do when receiving replies for a previous frame's requests?
|
||||
|
||||
const transfer = intercept.transfer;
|
||||
try bc.intercept_state.putTransfer(transfer);
|
||||
var request = transfer.req;
|
||||
try bc.intercept_state.put(transfer.id);
|
||||
const request = &transfer.req;
|
||||
|
||||
const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge;
|
||||
|
||||
try bc.cdp.sendEvent("Fetch.authRequired", .{
|
||||
.requestId = &id.toInterceptId(request.params.request_id),
|
||||
.requestId = &id.toInterceptId(transfer.id),
|
||||
.frameId = &id.toFrameId(request.params.frame_id),
|
||||
.request = network.RequestWriter.init(&request),
|
||||
.request = network.RequestWriter.init(transfer),
|
||||
.resourceType = switch (request.params.resource_type) {
|
||||
.script => "Script",
|
||||
.xhr => "XHR",
|
||||
@@ -446,13 +460,13 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati
|
||||
.scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "",
|
||||
.realm = challenge.realm orelse "",
|
||||
},
|
||||
.networkId = &id.toRequestId(&request),
|
||||
.networkId = &id.toRequestId(transfer),
|
||||
}, .{ .session_id = session_id });
|
||||
|
||||
log.debug(.cdp, "request auth required", .{
|
||||
.state = "paused",
|
||||
.id = request.params.request_id,
|
||||
.url = request.params.url,
|
||||
.id = transfer.id,
|
||||
.url = transfer.url,
|
||||
});
|
||||
// Await continueWithAuth
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ const Mime = @import("../../browser/Mime.zig");
|
||||
const Notification = @import("../../Notification.zig");
|
||||
const timestamp = @import("../../datetime.zig").timestamp;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
const CdpStorage = @import("storage.zig");
|
||||
@@ -262,7 +261,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request
|
||||
|
||||
// We're missing a bunch of fields, but, for now, this seems like enough
|
||||
try bc.cdp.sendEvent("Network.loadingFailed", .{
|
||||
.requestId = &id.toRequestId(msg.request),
|
||||
.requestId = &id.toRequestId(msg.transfer),
|
||||
// Seems to be what chrome answers with. I assume it depends on the type of error?
|
||||
.type = "Ping",
|
||||
.errorText = msg.err,
|
||||
@@ -275,7 +274,8 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques
|
||||
// things, but no session.
|
||||
const session_id = bc.session_id orelse return;
|
||||
|
||||
const req = msg.request;
|
||||
const transfer = msg.transfer;
|
||||
const req = &transfer.req;
|
||||
const frame_id = req.params.frame_id;
|
||||
const frame = bc.session.findFrameByFrameId(frame_id) orelse return;
|
||||
|
||||
@@ -287,11 +287,11 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques
|
||||
// We're missing a bunch of fields, but, for now, this eems like enough
|
||||
try bc.cdp.sendEvent("Network.requestWillBeSent", .{
|
||||
.frameId = &id.toFrameId(frame_id),
|
||||
.requestId = &id.toRequestId(req),
|
||||
.requestId = &id.toRequestId(transfer),
|
||||
.loaderId = &id.toLoaderId(req.params.loader_id),
|
||||
.type = req.params.resource_type.string(),
|
||||
.documentURL = frame.url,
|
||||
.request = RequestWriter.init(req),
|
||||
.request = RequestWriter.init(transfer),
|
||||
.initiator = .{ .type = "other" },
|
||||
.redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo
|
||||
.hasUserGesture = false,
|
||||
@@ -305,12 +305,13 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c
|
||||
// things, but no session.
|
||||
const session_id = bc.session_id orelse return;
|
||||
|
||||
const req = msg.request;
|
||||
const transfer = msg.transfer;
|
||||
const req = &transfer.req;
|
||||
|
||||
// We're missing a bunch of fields, but, for now, this seems like enough
|
||||
try bc.cdp.sendEvent("Network.responseReceived", .{
|
||||
.frameId = &id.toFrameId(req.params.frame_id),
|
||||
.requestId = &id.toRequestId(req),
|
||||
.requestId = &id.toRequestId(transfer),
|
||||
.loaderId = &id.toLoaderId(req.params.loader_id),
|
||||
.response = ResponseWriter.init(arena, msg.response),
|
||||
.hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo
|
||||
@@ -321,19 +322,18 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request
|
||||
// detachTarget could be called, in which case, we still have a frame doing
|
||||
// things, but no session.
|
||||
const session_id = bc.session_id orelse return;
|
||||
const req = msg.request;
|
||||
try bc.cdp.sendEvent("Network.loadingFinished", .{
|
||||
.requestId = &id.toRequestId(req),
|
||||
.requestId = &id.toRequestId(msg.transfer),
|
||||
.encodedDataLength = msg.content_length,
|
||||
}, .{ .session_id = session_id });
|
||||
}
|
||||
|
||||
pub const RequestWriter = struct {
|
||||
request: *Request,
|
||||
transfer: *Transfer,
|
||||
|
||||
pub fn init(request: *Request) RequestWriter {
|
||||
pub fn init(transfer: *Transfer) RequestWriter {
|
||||
return .{
|
||||
.request = request,
|
||||
.transfer = transfer,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -342,7 +342,8 @@ pub const RequestWriter = struct {
|
||||
}
|
||||
|
||||
fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void {
|
||||
const request = self.request;
|
||||
const transfer = self.transfer;
|
||||
const request = &transfer.req;
|
||||
|
||||
try jws.beginObject();
|
||||
{
|
||||
@@ -376,7 +377,7 @@ pub const RequestWriter = struct {
|
||||
try jws.objectField(hdr.name);
|
||||
try jws.write(hdr.value);
|
||||
}
|
||||
if (try request.getCookieString()) |cookies| {
|
||||
if (try request.getCookieString(transfer.arena)) |cookies| {
|
||||
try jws.objectField("Cookie");
|
||||
try jws.write(cookies[0 .. cookies.len - 1]);
|
||||
}
|
||||
|
||||
@@ -40,14 +40,14 @@ pub fn toLoaderId(id: u32) [14]u8 {
|
||||
|
||||
// requestId has special requirements. If it's the main document navigation,
|
||||
// then it should match the loader id.
|
||||
const Request = @import("../browser/HttpClient.zig").Request;
|
||||
pub fn toRequestId(req: *const Request) [14]u8 {
|
||||
if (req.params.resource_type == .document) {
|
||||
return toLoaderId(req.params.loader_id);
|
||||
const Transfer = @import("../browser/HttpClient.zig").Transfer;
|
||||
pub fn toRequestId(transfer: *const Transfer) [14]u8 {
|
||||
if (transfer.req.params.resource_type == .document) {
|
||||
return toLoaderId(transfer.req.params.loader_id);
|
||||
}
|
||||
|
||||
var buf: [14]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable;
|
||||
_ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable;
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,20 +18,21 @@
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
const log = lp.log;
|
||||
|
||||
const http = @import("../http.zig");
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
const Cache = @import("../cache/Cache.zig");
|
||||
const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata;
|
||||
const CachedResponse = @import("../cache/Cache.zig").CachedResponse;
|
||||
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const log = lp.log;
|
||||
|
||||
const CacheLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
@@ -45,54 +46,62 @@ pub fn layer(self: *CacheLayer) Layer {
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void {
|
||||
fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void {
|
||||
const self: *CacheLayer = @ptrCast(@alignCast(ptr));
|
||||
const network = client.network;
|
||||
const req = &transfer.req;
|
||||
|
||||
if (req.params.method != .GET) {
|
||||
return self.next.request(client, req);
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
const arena = req.params.arena;
|
||||
const arena = transfer.arena;
|
||||
|
||||
var iter = req.params.headers.iterator();
|
||||
const req_header_list = try iter.collect(arena);
|
||||
|
||||
if (network.cache.?.get(arena, .{
|
||||
.url = req.params.url,
|
||||
if (transfer.client.network.cache.?.get(arena, .{
|
||||
.url = transfer.url,
|
||||
.timestamp = std.time.timestamp(),
|
||||
.request_headers = req_header_list.items,
|
||||
})) |cached| {
|
||||
// Cache hit: serve synchronously from the original callbacks, then
|
||||
// tear down. On error, the transfer is still alive and Client.request's
|
||||
// errdefer will handle cleanup (loop_owned is still false).
|
||||
try serveFromCache(req, &cached);
|
||||
client.deinitRequest(req);
|
||||
transfer.deinit();
|
||||
return;
|
||||
}
|
||||
|
||||
const cache_ctx = try arena.create(CacheContext);
|
||||
cache_ctx.* = .{
|
||||
// Cache miss: install wrappers so we can inspect the response and decide
|
||||
// whether to write the body into the cache when it's done.
|
||||
const ctx = try arena.create(CacheContext);
|
||||
ctx.* = .{
|
||||
.arena = arena,
|
||||
.client = client,
|
||||
.forward = Forward.fromRequest(req),
|
||||
.req_url = req.params.url,
|
||||
.transfer = transfer,
|
||||
.forward = Forward.capture(req),
|
||||
.req_url = transfer.url,
|
||||
.req_headers = req.params.headers,
|
||||
};
|
||||
|
||||
const wrapped = cache_ctx.forward.wrapRequest(
|
||||
req,
|
||||
cache_ctx,
|
||||
.{
|
||||
.start = CacheContext.startCallback,
|
||||
.header = CacheContext.headerCallback,
|
||||
.done = CacheContext.doneCallback,
|
||||
.shutdown = CacheContext.shutdownCallback,
|
||||
.err = CacheContext.errorCallback,
|
||||
},
|
||||
);
|
||||
req.ctx = ctx;
|
||||
req.header_callback = CacheContext.headerCallback;
|
||||
req.data_callback = CacheContext.dataCallback;
|
||||
req.done_callback = CacheContext.doneCallback;
|
||||
req.error_callback = CacheContext.errorCallback;
|
||||
|
||||
return self.next.request(client, wrapped);
|
||||
if (ctx.forward.start != null) {
|
||||
// req.ctx was changed, need to ovewrite this
|
||||
req.start_callback = CacheContext.startCallback;
|
||||
}
|
||||
if (ctx.forward.shutdown != null) {
|
||||
// req.ctx was changed, need to ovewrite this
|
||||
req.shutdown_callback = CacheContext.shutdownCallback;
|
||||
}
|
||||
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
|
||||
fn serveFromCache(req: *Request, cached: *const CachedResponse) !void {
|
||||
const response = Response.fromCached(req.ctx, cached);
|
||||
defer switch (cached.data) {
|
||||
.buffer => |_| {},
|
||||
@@ -137,32 +146,41 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
|
||||
|
||||
const CacheContext = struct {
|
||||
arena: std.mem.Allocator,
|
||||
client: *Client,
|
||||
transfer: ?*Transfer = null,
|
||||
transfer: *Transfer,
|
||||
forward: Forward,
|
||||
req_url: [:0]const u8,
|
||||
req_headers: http.Headers,
|
||||
req_headers: @import("../http.zig").Headers,
|
||||
pending_metadata: ?*CachedMetadata = null,
|
||||
|
||||
fn startCallback(response: Response) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
self.transfer = response.inner.transfer;
|
||||
return self.forward.forwardStart(response);
|
||||
}
|
||||
|
||||
fn dataCallback(response: Response, chunk: []const u8) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
return self.forward.forwardData(response, chunk);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
const allocator = self.arena;
|
||||
|
||||
const transfer = response.inner.transfer;
|
||||
var rh = &transfer.response_header.?;
|
||||
// For non-transfer responses (fulfilled by interception, or future
|
||||
// cached-while-cached cases), there's nothing to inspect for caching
|
||||
// decisions — just forward.
|
||||
const transfer = switch (response.inner) {
|
||||
.transfer => |t| t,
|
||||
else => return self.forward.forwardHeader(response),
|
||||
};
|
||||
|
||||
const arena = self.arena;
|
||||
|
||||
const conn = transfer._conn.?;
|
||||
|
||||
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
|
||||
|
||||
var rh = &transfer.response_header.?;
|
||||
const maybe_cm = try Cache.tryCache(
|
||||
allocator,
|
||||
arena,
|
||||
std.time.timestamp(),
|
||||
transfer.url,
|
||||
rh.status,
|
||||
@@ -176,7 +194,7 @@ const CacheContext = struct {
|
||||
|
||||
if (maybe_cm) |cm| {
|
||||
var iter = transfer.responseHeaderIterator();
|
||||
var header_list = try iter.collect(allocator);
|
||||
var header_list = try iter.collect(arena);
|
||||
const end_of_response = header_list.items.len;
|
||||
|
||||
if (vary) |vary_str| {
|
||||
@@ -186,16 +204,16 @@ const CacheContext = struct {
|
||||
while (vary_iter.next()) |part| {
|
||||
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
|
||||
try header_list.append(allocator, .{
|
||||
.name = try allocator.dupe(u8, hdr.name),
|
||||
.value = try allocator.dupe(u8, hdr.value),
|
||||
try header_list.append(arena, .{
|
||||
.name = try arena.dupe(u8, hdr.name),
|
||||
.value = try arena.dupe(u8, hdr.value),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = try allocator.create(CachedMetadata);
|
||||
const metadata = try arena.create(CachedMetadata);
|
||||
metadata.* = cm;
|
||||
metadata.headers = header_list.items[0..end_of_response];
|
||||
metadata.vary_headers = header_list.items[end_of_response..];
|
||||
@@ -207,10 +225,10 @@ const CacheContext = struct {
|
||||
|
||||
fn doneCallback(ctx: *anyopaque) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer");
|
||||
const transfer = self.transfer;
|
||||
|
||||
if (self.pending_metadata) |metadata| {
|
||||
const cache = &self.client.network.cache.?;
|
||||
const cache = &transfer.client.network.cache.?;
|
||||
|
||||
log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata });
|
||||
cache.put(metadata.*, transfer._stream_buffer.items) catch |err| {
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// A snapshot of the original ctx + callbacks from a Request, taken before a
|
||||
// layer overwrites them with its own wrappers. The layer's wrapper callbacks
|
||||
// call forwardX(...) to invoke the captured originals with the original ctx.
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
@@ -29,7 +32,7 @@ done: Request.DoneCallback,
|
||||
err: Request.ErrorCallback,
|
||||
shutdown: ?Request.ShutdownCallback,
|
||||
|
||||
pub fn fromRequest(req: Request) Forward {
|
||||
pub fn capture(req: *const Request) Forward {
|
||||
return .{
|
||||
.ctx = req.ctx,
|
||||
.start = req.start_callback,
|
||||
@@ -41,68 +44,6 @@ pub fn fromRequest(req: Request) Forward {
|
||||
};
|
||||
}
|
||||
|
||||
pub const Overrides = struct {
|
||||
start: ?Request.StartCallback = null,
|
||||
header: ?Request.HeaderCallback = null,
|
||||
data: ?Request.DataCallback = null,
|
||||
done: ?Request.DoneCallback = null,
|
||||
err: ?Request.ErrorCallback = null,
|
||||
shutdown: ?Request.ShutdownCallback = null,
|
||||
};
|
||||
|
||||
pub fn wrapRequest(
|
||||
self: *Forward,
|
||||
req: Request,
|
||||
new_ctx: anytype,
|
||||
overrides: Overrides,
|
||||
) Request {
|
||||
const T = @TypeOf(new_ctx.*);
|
||||
const PassthroughT = makePassthrough(T, "forward");
|
||||
var wrapped = req;
|
||||
wrapped.ctx = new_ctx;
|
||||
wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null;
|
||||
wrapped.header_callback = overrides.header orelse PassthroughT.header;
|
||||
wrapped.data_callback = overrides.data orelse PassthroughT.data;
|
||||
wrapped.done_callback = overrides.done orelse PassthroughT.done;
|
||||
wrapped.error_callback = overrides.err orelse PassthroughT.err;
|
||||
wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null;
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
fn makePassthrough(comptime T: type, comptime field: []const u8) type {
|
||||
return struct {
|
||||
pub fn start(response: Response) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardStart(response);
|
||||
}
|
||||
|
||||
pub fn header(response: Response) anyerror!bool {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardHeader(response);
|
||||
}
|
||||
|
||||
pub fn data(response: Response, chunk: []const u8) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardData(response, chunk);
|
||||
}
|
||||
|
||||
pub fn done(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
return @field(self, field).forwardDone();
|
||||
}
|
||||
|
||||
pub fn err(ctx_ptr: *anyopaque, e: anyerror) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardErr(e);
|
||||
}
|
||||
|
||||
pub fn shutdown(ctx_ptr: *anyopaque) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardShutdown();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn forwardStart(self: Forward, response: Response) anyerror!void {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
|
||||
@@ -26,6 +26,7 @@ const IS_DEBUG = builtin.mode == .Debug;
|
||||
const http = @import("../http.zig");
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
@@ -33,13 +34,11 @@ const Forward = @import("Forward.zig");
|
||||
|
||||
const InterceptionLayer = @This();
|
||||
|
||||
// Count of intercepted requests. This is to help deal with intercepted requests.
|
||||
// The client doesn't track intercepted transfers. If a request is intercepted,
|
||||
// the client forgets about it and requires the interceptor to continue or abort
|
||||
// it. That works well, except if we only rely on active, we might think there's
|
||||
// no more network activity when, with interecepted requests, there might be more
|
||||
// in the future. (We really only need this to properly emit a 'networkIdle' and
|
||||
// 'networkAlmostIdle' Page.lifecycleEvent in CDP).
|
||||
// Count of intercepted requests. The client doesn't track intercepted transfers
|
||||
// on its own active counters: once intercepted, a transfer leaves the layer
|
||||
// chain and waits for the interceptor (CDP) to call continue/abort/fulfill.
|
||||
// We track them here so the network-idle / network-almost-idle CDP lifecycle
|
||||
// events don't fire prematurely.
|
||||
intercepted: usize = 0,
|
||||
|
||||
next: Layer = undefined,
|
||||
@@ -51,35 +50,33 @@ pub fn layer(self: *InterceptionLayer) Layer {
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void {
|
||||
fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void {
|
||||
const self: *InterceptionLayer = @ptrCast(@alignCast(ptr));
|
||||
const req = &transfer.req;
|
||||
|
||||
const intercept_ctx = try in_req.params.arena.create(InterceptContext);
|
||||
intercept_ctx.* = .{
|
||||
.client = client,
|
||||
.forward = Forward.fromRequest(in_req),
|
||||
const ctx = try transfer.arena.create(InterceptContext);
|
||||
ctx.* = .{
|
||||
.layer = self,
|
||||
.request = in_req,
|
||||
.transfer = transfer,
|
||||
.forward = Forward.capture(req),
|
||||
};
|
||||
|
||||
var req = intercept_ctx.forward.wrapRequest(
|
||||
in_req,
|
||||
intercept_ctx,
|
||||
.{
|
||||
.start = InterceptContext.startCallback,
|
||||
.header = InterceptContext.headerCallback,
|
||||
.data = InterceptContext.dataCallback,
|
||||
.done = InterceptContext.doneCallback,
|
||||
.err = InterceptContext.errorCallback,
|
||||
.shutdown = InterceptContext.shutdownCallback,
|
||||
},
|
||||
);
|
||||
// Install our wrappers on the transfer's request. The interceptor wants to
|
||||
// observe every callback (start/header/data/done/err/shutdown) so it can
|
||||
// mirror the Network.* CDP events.
|
||||
req.ctx = ctx;
|
||||
if (ctx.forward.start != null) req.start_callback = InterceptContext.startCallback;
|
||||
req.header_callback = InterceptContext.headerCallback;
|
||||
req.data_callback = InterceptContext.dataCallback;
|
||||
req.done_callback = InterceptContext.doneCallback;
|
||||
req.error_callback = InterceptContext.errorCallback;
|
||||
if (ctx.forward.shutdown != null) req.shutdown_callback = InterceptContext.shutdownCallback;
|
||||
|
||||
req.params.notification.dispatch(.http_request_start, &.{ .request = &req });
|
||||
req.params.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
|
||||
|
||||
var wait_for_interception = false;
|
||||
req.params.notification.dispatch(.http_request_intercept, &.{
|
||||
.request = &req,
|
||||
.transfer = transfer,
|
||||
.wait_for_interception = &wait_for_interception,
|
||||
});
|
||||
|
||||
@@ -90,40 +87,44 @@ fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void {
|
||||
});
|
||||
|
||||
if (!wait_for_interception) {
|
||||
return self.next.request(client, req);
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
// Paused: the CDP listener stashed `transfer` and will eventually call
|
||||
// continueRequest / abortRequest / fulfillRequest. Until then, CDP owns
|
||||
// the transfer's lifecycle, so flag it loop_owned to keep the outer
|
||||
// Client.request errdefer from tearing it down.
|
||||
self.intercepted += 1;
|
||||
transfer.loop_owned = true;
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted });
|
||||
}
|
||||
}
|
||||
|
||||
pub const InterceptContext = struct {
|
||||
client: *Client,
|
||||
forward: Forward,
|
||||
layer: *InterceptionLayer,
|
||||
request: Request,
|
||||
transfer: *Transfer,
|
||||
forward: Forward,
|
||||
content_length: usize = 0,
|
||||
|
||||
fn startCallback(response: Response) anyerror!void {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(response.ctx));
|
||||
log.debug(.http, "intercept start", .{ .url = self.request.params.url });
|
||||
log.debug(.http, "intercept start", .{ .url = self.transfer.url });
|
||||
return self.forward.forwardStart(response);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(response.ctx));
|
||||
log.debug(.http, "intercept header", .{
|
||||
.url = self.request.params.url,
|
||||
.url = self.transfer.url,
|
||||
.status = response.status(),
|
||||
.content_length = response.contentLength(),
|
||||
});
|
||||
|
||||
self.content_length = response.contentLength() orelse 0;
|
||||
|
||||
self.request.params.notification.dispatch(.http_response_header_done, &.{
|
||||
.request = &self.request,
|
||||
self.transfer.req.params.notification.dispatch(.http_response_header_done, &.{
|
||||
.transfer = self.transfer,
|
||||
.response = &response,
|
||||
});
|
||||
|
||||
@@ -133,13 +134,13 @@ pub const InterceptContext = struct {
|
||||
fn dataCallback(response: Response, chunk: []const u8) anyerror!void {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(response.ctx));
|
||||
log.debug(.http, "intercept data", .{
|
||||
.url = self.request.params.url,
|
||||
.url = self.transfer.url,
|
||||
.len = chunk.len,
|
||||
});
|
||||
|
||||
self.request.params.notification.dispatch(.http_response_data, &.{
|
||||
self.transfer.req.params.notification.dispatch(.http_response_data, &.{
|
||||
.data = chunk,
|
||||
.request = &self.request,
|
||||
.transfer = self.transfer,
|
||||
});
|
||||
|
||||
return self.forward.forwardData(response, chunk);
|
||||
@@ -149,12 +150,12 @@ pub const InterceptContext = struct {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(ctx));
|
||||
|
||||
log.debug(.http, "intercept done", .{
|
||||
.url = self.request.params.url,
|
||||
.url = self.transfer.url,
|
||||
.content_length = self.content_length,
|
||||
});
|
||||
|
||||
self.request.params.notification.dispatch(.http_request_done, &.{
|
||||
.request = &self.request,
|
||||
self.transfer.req.params.notification.dispatch(.http_request_done, &.{
|
||||
.transfer = self.transfer,
|
||||
.content_length = self.content_length,
|
||||
});
|
||||
return self.forward.forwardDone();
|
||||
@@ -164,11 +165,11 @@ pub const InterceptContext = struct {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(ctx));
|
||||
|
||||
log.debug(.http, "intercept error", .{
|
||||
.url = self.request.params.url,
|
||||
.url = self.transfer.url,
|
||||
.err = err,
|
||||
});
|
||||
self.request.params.notification.dispatch(.http_request_fail, &.{
|
||||
.request = &self.request,
|
||||
self.transfer.req.params.notification.dispatch(.http_request_fail, &.{
|
||||
.transfer = self.transfer,
|
||||
.err = err,
|
||||
});
|
||||
self.forward.forwardErr(err);
|
||||
@@ -177,50 +178,82 @@ pub const InterceptContext = struct {
|
||||
fn shutdownCallback(ctx: *anyopaque) void {
|
||||
const self: *InterceptContext = @ptrCast(@alignCast(ctx));
|
||||
|
||||
log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url });
|
||||
self.request.params.notification.dispatch(.http_request_fail, &.{
|
||||
.request = &self.request,
|
||||
log.debug(.http, "intercept shutdown", .{ .url = self.transfer.url });
|
||||
self.transfer.req.params.notification.dispatch(.http_request_fail, &.{
|
||||
.transfer = self.transfer,
|
||||
.err = error.Shutdown,
|
||||
});
|
||||
self.forward.forwardShutdown();
|
||||
}
|
||||
};
|
||||
|
||||
// CDP Callbacks
|
||||
// These handle their own clean up on errors with `self.next.request`.
|
||||
// This is because they don't pass their error up the chain as they are async callbacks.
|
||||
// CDP-driven resolution entry points. The transfer was paused inside `request`
|
||||
// (loop_owned = true). One of these three is called by CDP to resume / drop
|
||||
// the transfer.
|
||||
|
||||
pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void {
|
||||
pub fn continueRequest(self: *InterceptionLayer, transfer: *Transfer) anyerror!void {
|
||||
if (comptime IS_DEBUG) {
|
||||
lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted });
|
||||
log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted });
|
||||
}
|
||||
|
||||
self.intercepted -= 1;
|
||||
self.next.request(client, req) catch |err| {
|
||||
const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx));
|
||||
req.error_callback(req.ctx, err);
|
||||
ctx.client.deinitRequest(req);
|
||||
|
||||
// Resume the layer chain. Ownership is re-handed to whichever subsequent
|
||||
// layer commits the transfer (queue, multi, or another pause). If the
|
||||
// chain fails before any commit, we clean up here. Mirror the errdefer
|
||||
// pattern in Client.request.
|
||||
transfer.loop_owned = false;
|
||||
self.next.request(transfer) catch |err| {
|
||||
if (!transfer.loop_owned) {
|
||||
transfer.abort(err);
|
||||
}
|
||||
return err;
|
||||
};
|
||||
}
|
||||
|
||||
pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void {
|
||||
pub fn abortRequest(self: *InterceptionLayer, transfer: *Transfer) void {
|
||||
if (comptime IS_DEBUG) {
|
||||
lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted });
|
||||
log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted });
|
||||
}
|
||||
self.intercepted -= 1;
|
||||
|
||||
req.error_callback(req.ctx, error.Abort);
|
||||
client.deinitRequest(req);
|
||||
transfer.abort(error.Abort);
|
||||
}
|
||||
|
||||
fn fulfillInner(
|
||||
req: Request,
|
||||
pub fn fulfillRequest(
|
||||
self: *InterceptionLayer,
|
||||
transfer: *Transfer,
|
||||
status: u16,
|
||||
headers: []const http.Header,
|
||||
body: ?[]const u8,
|
||||
) !void {
|
||||
if (comptime IS_DEBUG) {
|
||||
lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted });
|
||||
log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted });
|
||||
}
|
||||
self.intercepted -= 1;
|
||||
|
||||
// `done` flips true once we've called the user's done_callback. If
|
||||
// done_callback itself throws, the user already saw their end-of-flow
|
||||
// notification; suppress error_callback to avoid double-notify.
|
||||
var done: bool = false;
|
||||
fulfillInner(&transfer.req, status, headers, body, &done) catch |err| {
|
||||
if (!done) {
|
||||
transfer.abort(err);
|
||||
} else {
|
||||
transfer.deinit();
|
||||
}
|
||||
return err;
|
||||
};
|
||||
transfer.deinit();
|
||||
}
|
||||
|
||||
fn fulfillInner(
|
||||
req: *Request,
|
||||
status: u16,
|
||||
headers: []const http.Header,
|
||||
body: ?[]const u8,
|
||||
done: *bool,
|
||||
) !void {
|
||||
const fulfilled = FulfilledResponse{
|
||||
.status = status,
|
||||
@@ -244,27 +277,6 @@ fn fulfillInner(
|
||||
try req.data_callback(response, b);
|
||||
}
|
||||
|
||||
done.* = true;
|
||||
try req.done_callback(req.ctx);
|
||||
}
|
||||
|
||||
pub fn fulfillRequest(
|
||||
self: *InterceptionLayer,
|
||||
client: *Client,
|
||||
req: Request,
|
||||
status: u16,
|
||||
headers: []const http.Header,
|
||||
body: ?[]const u8,
|
||||
) !void {
|
||||
if (comptime IS_DEBUG) {
|
||||
lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted });
|
||||
log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted });
|
||||
}
|
||||
|
||||
self.intercepted -= 1;
|
||||
defer client.deinitRequest(req);
|
||||
|
||||
fulfillInner(req, status, headers, body) catch |err| {
|
||||
req.error_callback(req.ctx, err);
|
||||
return err;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -18,21 +18,27 @@
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
const log = lp.log;
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const Robots = @import("../Robots.zig");
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
const Robots = @import("../Robots.zig");
|
||||
const Network = @import("../Network.zig");
|
||||
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const log = lp.log;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const RobotsLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
allocator: std.mem.Allocator,
|
||||
pending: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
|
||||
network: *Network,
|
||||
allocator: Allocator,
|
||||
pending: std.StringHashMapUnmanaged(std.ArrayList(*Transfer)) = .empty,
|
||||
|
||||
pub fn layer(self: *RobotsLayer) Layer {
|
||||
return .{
|
||||
@@ -43,7 +49,7 @@ pub fn layer(self: *RobotsLayer) Layer {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void {
|
||||
pub fn deinit(self: *RobotsLayer, allocator: Allocator) void {
|
||||
var it = self.pending.iterator();
|
||||
while (it.next()) |entry| {
|
||||
entry.value_ptr.deinit(allocator);
|
||||
@@ -51,35 +57,38 @@ pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void {
|
||||
self.pending.deinit(allocator);
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void {
|
||||
fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void {
|
||||
const self: *RobotsLayer = @ptrCast(@alignCast(ptr));
|
||||
|
||||
const arena = req.params.arena;
|
||||
const robots_url = try URL.getRobotsUrl(arena, req.params.url);
|
||||
if (transfer.req.params.skip_robots) {
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
if (client.network.robot_store.get(robots_url)) |robot_entry| {
|
||||
const url = transfer.url;
|
||||
const robots_url = try URL.getRobotsUrl(transfer.arena, url);
|
||||
|
||||
if (self.network.robot_store.get(robots_url)) |robot_entry| {
|
||||
switch (robot_entry) {
|
||||
.present => |robots| {
|
||||
const path = URL.getPathname(req.params.url);
|
||||
const path = URL.getPathname(url);
|
||||
|
||||
if (!robots.isAllowed(path)) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = req.params.url });
|
||||
log.warn(.http, "blocked by robots", .{ .url = url });
|
||||
return error.RobotsBlocked;
|
||||
}
|
||||
},
|
||||
.absent => {},
|
||||
}
|
||||
return self.next.request(client, req);
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
return self.fetchRobotsThenRequest(client, robots_url, req);
|
||||
return self.fetchRobotsThenRequest(robots_url, transfer);
|
||||
}
|
||||
|
||||
fn fetchRobotsThenRequest(
|
||||
self: *RobotsLayer,
|
||||
client: *Client,
|
||||
robots_url: [:0]const u8,
|
||||
req: Request,
|
||||
transfer: *Transfer,
|
||||
) !void {
|
||||
const entry = try self.pending.getOrPut(self.allocator, robots_url);
|
||||
|
||||
@@ -87,84 +96,95 @@ fn fetchRobotsThenRequest(
|
||||
errdefer std.debug.assert(self.pending.remove(robots_url));
|
||||
entry.value_ptr.* = .empty;
|
||||
|
||||
// This arena is later owned by the Request. It does not need to be cleaned up by us because
|
||||
// it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers.
|
||||
const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext");
|
||||
errdefer client.network.app.arena_pool.release(new_arena);
|
||||
|
||||
const robots_ctx = try new_arena.create(RobotsContext);
|
||||
const robots_ctx = try transfer.arena.create(RobotsContext);
|
||||
robots_ctx.* = .{
|
||||
.layer = self,
|
||||
.client = client,
|
||||
.arena = new_arena,
|
||||
.robots_url = robots_url,
|
||||
.buffer = .empty,
|
||||
.arena = transfer.arena,
|
||||
.robots_url = robots_url,
|
||||
};
|
||||
|
||||
const headers = try client.newHeaders();
|
||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||
var params = transfer.req.params;
|
||||
if (@typeInfo(@TypeOf(params)) != .@"struct") {
|
||||
// protect against mutating the original request
|
||||
@compileError("expected request.params to be a struct");
|
||||
}
|
||||
|
||||
try self.next.request(client, .{
|
||||
// CRITICAL: build a fresh Headers for the inner robots fetch.
|
||||
// params is value-copied from the parent's req.params, but
|
||||
// Headers is a struct wrapping a *curl_slist — value copy shares
|
||||
// the pointer. Letting Client.request take ownership of a shared
|
||||
// headers list means both transfers will free it at deinit time
|
||||
// -> double-free. The robots.txt fetch is a system-level GET
|
||||
// anyway, no need to inherit the parent's user headers.
|
||||
params.headers = try transfer.client.newHeaders();
|
||||
errdefer params.headers.deinit();
|
||||
params.method = .GET;
|
||||
params.url = robots_url;
|
||||
params.skip_robots = true;
|
||||
params.resource_type = .fetch;
|
||||
params.body = null;
|
||||
|
||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||
try transfer.client.request(.{
|
||||
.ctx = robots_ctx,
|
||||
.params = .{
|
||||
// We have to do this ourselves because we are not going through the top level `request`.
|
||||
.arena = new_arena,
|
||||
.request_id = client.incrReqId(),
|
||||
.url = robots_url,
|
||||
.method = .GET,
|
||||
.headers = headers,
|
||||
.frame_id = req.params.frame_id,
|
||||
.loader_id = req.params.loader_id,
|
||||
.cookie_jar = req.params.cookie_jar,
|
||||
.cookie_origin = req.params.cookie_origin,
|
||||
.notification = req.params.notification,
|
||||
.resource_type = .fetch,
|
||||
},
|
||||
.params = params,
|
||||
.header_callback = RobotsContext.headerCallback,
|
||||
.data_callback = RobotsContext.dataCallback,
|
||||
.done_callback = RobotsContext.doneCallback,
|
||||
.error_callback = RobotsContext.errorCallback,
|
||||
.shutdown_callback = RobotsContext.shutdownCallback,
|
||||
});
|
||||
}, transfer.owner);
|
||||
}
|
||||
|
||||
try entry.value_ptr.append(self.allocator, req);
|
||||
try entry.value_ptr.append(self.allocator, transfer);
|
||||
// Parked: RobotsLayer owns destruction via flushPending / flushPendingShutdown
|
||||
// until robots.txt resolves. Without this, Client.request's errdefer (or
|
||||
// any caller's cleanup) would deinit a transfer that's still on the
|
||||
// pending list, leaving flushPending with a dangling pointer.
|
||||
transfer.loop_owned = true;
|
||||
}
|
||||
|
||||
fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPending: missing queue");
|
||||
fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPending: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
for (queued.value.items) |transfer| {
|
||||
if (!allowed) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url });
|
||||
defer client.deinitRequest(queued_req);
|
||||
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
|
||||
log.warn(.http, "blocked by robots", .{ .url = transfer.url });
|
||||
transfer.abort(error.RobotsBlocked);
|
||||
} else {
|
||||
self.next.request(client, queued_req) catch |e| {
|
||||
defer client.deinitRequest(queued_req);
|
||||
queued_req.error_callback(queued_req.ctx, e);
|
||||
// Reset ownership: handing back to the layer chain. If a downstream
|
||||
// layer commits (multi / queue / pause), it'll flip loop_owned back
|
||||
// to true. If it fails before committing, we clean up here.
|
||||
transfer.loop_owned = false;
|
||||
self.next.request(transfer) catch |e| {
|
||||
if (!transfer.loop_owned) {
|
||||
transfer.abort(e);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
// Invariant: shutdown_callback fires on a Transfer only via Transfer.kill,
|
||||
// and the only callers of kill are Client.abortOwner / .abortRequests
|
||||
// (owner-driven teardown). So if THIS robots fetch's shutdown_callback
|
||||
// fired, the owner is being torn down — every parked transfer in this
|
||||
// pending queue is on the same owner list and is already being killed by
|
||||
// the same walk. We just need to drop the pending entry; the owner walk
|
||||
// handles the rest. (If a future code path adds per-transfer kill
|
||||
// without owner teardown, this assumption breaks — see comment above
|
||||
// detachOrDeinit in HttpClient.zig.)
|
||||
fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void {
|
||||
var pending = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPendingShutdown: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
defer client.deinitRequest(queued_req);
|
||||
if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx);
|
||||
}
|
||||
pending.value.deinit(self.allocator);
|
||||
}
|
||||
|
||||
const RobotsContext = struct {
|
||||
layer: *RobotsLayer,
|
||||
arena: std.mem.Allocator,
|
||||
client: *Client,
|
||||
arena: Allocator,
|
||||
robots_url: [:0]const u8,
|
||||
buffer: std.ArrayList(u8),
|
||||
status: u16 = 0,
|
||||
@@ -199,11 +219,10 @@ const RobotsContext = struct {
|
||||
fn doneCallback(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const client = self.client;
|
||||
const robots_url = self.robots_url;
|
||||
|
||||
var allowed = true;
|
||||
const network = client.network;
|
||||
const network = l.network;
|
||||
|
||||
switch (self.status) {
|
||||
200 => {
|
||||
@@ -218,7 +237,7 @@ const RobotsContext = struct {
|
||||
};
|
||||
if (robots) |r| {
|
||||
try network.robot_store.put(robots_url, r);
|
||||
const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url);
|
||||
const path = URL.getPathname(l.pending.get(robots_url).?.items[0].req.params.url);
|
||||
allowed = r.isAllowed(path);
|
||||
}
|
||||
}
|
||||
@@ -236,26 +255,24 @@ const RobotsContext = struct {
|
||||
},
|
||||
}
|
||||
|
||||
l.flushPending(client, robots_url, allowed);
|
||||
l.flushPending(robots_url, allowed);
|
||||
}
|
||||
|
||||
fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const client = self.client;
|
||||
const robots_url = self.robots_url;
|
||||
|
||||
log.warn(.http, "robots fetch failed", .{ .err = err });
|
||||
l.flushPending(client, robots_url, true);
|
||||
l.flushPending(robots_url, true);
|
||||
}
|
||||
|
||||
fn shutdownCallback(ctx_ptr: *anyopaque) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const client = self.client;
|
||||
const robots_url = self.robots_url;
|
||||
|
||||
log.debug(.http, "robots fetch shutdown", .{});
|
||||
l.flushPendingShutdown(robots_url, client);
|
||||
l.flushPendingShutdown(robots_url);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -18,13 +18,15 @@
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
const log = lp.log;
|
||||
|
||||
const WebBotAuth = @import("../WebBotAuth.zig");
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const WebBotAuth = @import("../WebBotAuth.zig");
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
const Client = @import("../../browser/HttpClient.zig").Client;
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
|
||||
const log = lp.log;
|
||||
|
||||
const WebBotAuthLayer = @This();
|
||||
|
||||
@@ -37,15 +39,13 @@ pub fn layer(self: *WebBotAuthLayer) Layer {
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void {
|
||||
fn request(ptr: *anyopaque, transfer: *Transfer) anyerror!void {
|
||||
const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr));
|
||||
var our_req = req;
|
||||
|
||||
const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth");
|
||||
const wba = transfer.client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth");
|
||||
|
||||
const arena = req.params.arena;
|
||||
const authority = URL.getHost(req.params.url);
|
||||
try wba.signRequest(arena, &our_req.params.headers, authority);
|
||||
const authority = URL.getHost(transfer.url);
|
||||
try wba.signRequest(transfer.arena, &transfer.req.params.headers, authority);
|
||||
|
||||
return self.next.request(client, our_req);
|
||||
return self.next.request(transfer);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user