diff --git a/src/Notification.zig b/src/Notification.zig index d01d29ce..f429e7b9 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,6 +21,9 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; +const Request = @import("browser/HttpClient.zig").Request; +const Response = @import("browser/HttpClient.zig").Response; +const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext; const log = lp.log; const List = std.DoublyLinkedList; @@ -162,11 +165,11 @@ pub const FrameLoaded = struct { }; pub const RequestStart = struct { - transfer: *Transfer, + request: *Request, }; pub const RequestIntercept = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; @@ -177,19 +180,21 @@ pub const RequestAuthRequired = struct { pub const ResponseData = struct { data: []const u8, - transfer: *Transfer, + request: *Request, }; pub const ResponseHeaderDone = struct { - transfer: *Transfer, + request: *Request, + response: *const Response, }; pub const RequestDone = struct { - transfer: *Transfer, + request: *Request, + content_length: usize, }; pub const RequestFail = struct { - transfer: *Transfer, + request: *Request, err: anyerror, }; diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index e407ab12..3e8ff90f 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -647,16 +647,18 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo http_client.request(.{ .ctx = self, - .url = self.url, - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .method = opts.method, - .headers = headers, - .body = opts.body, - .cookie_jar = &session.cookie_jar, - .cookie_origin = self.url, - .resource_type = .document, - .notification = self._session.notification, + .params = .{ + .url = self.url, + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .method = opts.method, + .headers = headers, + .body = opts.body, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.url, + .resource_type = .document, + .notification = self._session.notification, + }, .header_callback = frameHeaderDoneCallback, .data_callback = frameDataCallback, .done_callback = frameDoneCallback, diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0832db1a..a810c535 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -27,7 +27,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); -const Cache = @import("../network/cache/Cache.zig"); const timestamp = @import("../datetime.zig").timestamp; const log = lp.log; @@ -40,8 +39,12 @@ pub const Method = http.Method; pub const Headers = http.Headers; pub const ResponseHead = http.ResponseHead; pub const HeaderIterator = http.HeaderIterator; -const CacheMetadata = Cache.CachedMetadata; -const CachedResponse = Cache.CachedResponse; +const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; + +pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); +pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); +pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); +pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -62,15 +65,6 @@ ws_active: usize = 0, // Count of active http requests http_active: usize = 0, -// Count of intercepted requests. This is to help deal with intercepted requests. -// The client doesn't track intercepted transfers. If a request is intercepted, -// the client forgets about it and requires the interceptor to continue or abort -// it. That works well, except if we only rely on active, we might think there's -// no more network activity when, with interecepted requests, there might be more -// in the future. (We really only need this to properly emit a 'networkIdle' and -// 'networkAlmostIdle' Page.lifecycleEvent in CDP). -intercepted: usize = 0, - // Our curl multi handle. handles: http.Handles, @@ -101,10 +95,6 @@ allocator: Allocator, network: *Network, -// Queue of requests that depend on a robots.txt. -// Allows us to fetch the robots.txt just once. -pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, - // Once we have a handle/easy to process a request with, we create a Transfer // which contains the Request as well as any state we need to process the // request. These will come and go with each request. @@ -134,6 +124,30 @@ cdp_client: ?CDPClient = null, max_response_size: usize, +cache_layer: CacheLayer, +robots_layer: RobotsLayer, +web_bot_auth_layer: WebBotAuthLayer, +interception_layer: InterceptionLayer, +entry_layer: Layer, + +pub const Layer = struct { + ptr: *anyopaque, + vtable: *const VTable, + + pub const VTable = struct { + request: *const fn (*anyopaque, *Client, Request) anyerror!void, + }; + + pub fn request(self: Layer, client: *Client, req: Request) !void { + return self.vtable.request(self.ptr, client, req); + } +}; + +fn layerWith(self: anytype, next: Layer) Layer { + self.next = next; + return self.layer(); +} + // libcurl can monitor arbitrary sockets, this lets us use libcurl to poll // both HTTP data as well as messages from an CDP connection. // Furthermore, we have some tension between blocking scripts and request @@ -175,8 +189,32 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .tls_verify = network.config.tlsVerifyHost(), .obey_robots = network.config.obeyRobots(), .max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32), + + .cache_layer = .{}, + .robots_layer = .{ .allocator = allocator }, + .web_bot_auth_layer = .{}, + .interception_layer = .{}, + .entry_layer = undefined, }; + var next = client.layer(); + + if (network.config.obeyRobots()) { + next = layerWith(&client.robots_layer, next); + } + + if (network.config.httpCacheDir() != null) { + next = layerWith(&client.cache_layer, next); + } + + next = layerWith(&client.interception_layer, next); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + + client.entry_layer = next; + return client; } @@ -185,17 +223,20 @@ pub fn deinit(self: *Client) void { self.handles.deinit(); self.transfer_pool.deinit(); - - var robots_iter = self.pending_robots_queue.iterator(); - while (robots_iter.next()) |entry| { - entry.value_ptr.deinit(self.allocator); - } - self.pending_robots_queue.deinit(self.allocator); - self.clearUserAgentOverride(); + + self.robots_layer.deinit(self.allocator); + self.allocator.destroy(self); } +pub fn layer(self: *Client) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = _request }, + }; +} + // Set a user agent override. Both the raw UA string and the pre-formatted // "User-Agent: " header string are allocated from self.allocator. pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void { @@ -285,7 +326,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { const transfer: *Transfer = @fieldParentPtr("_node", node); if (comptime abort_all) { transfer.kill(); - } else if (transfer.req.frame_id == frame_id) { + } else if (transfer.req.params.frame_id == frame_id) { q.remove(node); transfer.kill(); } @@ -322,7 +363,7 @@ fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_ const conn: *http.Connection = @fieldParentPtr("node", node); switch (conn.transport) { .http => |transfer| { - if ((comptime abort_all) or transfer.req.frame_id == frame_id) { + if ((comptime abort_all) or transfer.req.params.frame_id == frame_id) { transfer.kill(); } }, @@ -349,346 +390,104 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn request(self: *Client, req: Request) !void { - if (self.obey_robots == false) { - return self.processRequest(req); - } - - const robots_url = try URL.getRobotsUrl(self.allocator, req.url); - errdefer self.allocator.free(robots_url); - - // If we have this robots cached, we can take a fast path. - if (self.network.robot_store.get(robots_url)) |robot_entry| { - defer self.allocator.free(robots_url); - - switch (robot_entry) { - // If we have a found robots entry, we check it. - .present => |robots| { - const path = URL.getPathname(req.url); - if (!robots.isAllowed(path)) { - req.error_callback(req.ctx, error.RobotsBlocked); - return; - } - }, - // Otherwise, we assume we won't find it again. - .absent => {}, - } - - return self.processRequest(req); - } - return self.fetchRobotsThenProcessRequest(robots_url, req); -} - -fn serveFromCache(req: Request, cached: *const CachedResponse) !void { - const response = Response.fromCached(req.ctx, cached); - defer switch (cached.data) { - .buffer => |_| {}, - .file => |f| f.file.close(), - }; - - if (req.start_callback) |cb| { - try cb(response); - } - - const proceed = try req.header_callback(response); - if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; - } - - switch (cached.data) { - .buffer => |data| { - if (data.len > 0) { - try req.data_callback(response, data); - } - }, - .file => |f| { - const file = f.file; - - var buf: [1024]u8 = undefined; - var file_reader = file.reader(&buf); - try file_reader.seekTo(f.offset); - const reader = &file_reader.interface; - - var read_buf: [1024]u8 = undefined; - var remaining = f.len; - - while (remaining > 0) { - const read_len = @min(read_buf.len, remaining); - const n = try reader.readSliceShort(read_buf[0..read_len]); - if (n == 0) break; - remaining -= n; - try req.data_callback(response, read_buf[0..n]); - } - }, - } - - try req.done_callback(req.ctx); -} - -fn processRequest(self: *Client, req: Request) !void { - if (self.network.cache) |*cache| { - if (req.method == .GET) { - // cache is only used to read the meta data - const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); - defer self.network.app.arena_pool.release(arena); - - var iter = req.headers.iterator(); - const req_header_list = try iter.collect(arena); - - if (cache.get(arena, .{ - .url = req.url, - .timestamp = std.time.timestamp(), - .request_headers = req_header_list.items, - })) |cached| { - defer req.headers.deinit(); - return serveFromCache(req, &cached); - } - } - } - +pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { + const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - - transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); - - var wait_for_interception = false; - transfer.req.notification.dispatch(.http_request_intercept, &.{ - .transfer = transfer, - .wait_for_interception = &wait_for_interception, - }); - if (wait_for_interception == false) { - // request not intercepted, process it normally - return self.process(transfer); - } - - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - if (req.blocking == false) { - // The request was interecepted, but it isn't a blocking request, so we - // dont' need to block this call. The request will be unblocked - // asynchronously via either continueTransfer or abortTransfer - return; - } - - if (try self.waitForInterceptedResponse(transfer)) { - return self.process(transfer); - } + return self.process(transfer); } -const RobotsRequestContext = struct { - client: *Client, - req: Request, - robots_url: [:0]const u8, - buffer: std.ArrayList(u8), - status: u16 = 0, +pub fn request(self: *Client, req: Request) !void { + // Assign Request Id. + var our_req = req; + our_req.params.request_id = self.incrReqId(); - pub fn deinit(self: *RobotsRequestContext) void { - self.client.allocator.free(self.robots_url); - self.buffer.deinit(self.client.allocator); - self.client.allocator.destroy(self); + const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); + our_req.params.arena = arena; + + return self.entry_layer.request(self, our_req) catch |err| { + our_req.error_callback(our_req.ctx, err); + self.deinitRequest(our_req); + return err; + }; +} + +const SyncContext = struct { + allocator: Allocator, + completion: union(enum) { + in_progress: void, + done: void, + err: anyerror, + shutdown: void, + } = .in_progress, + + status: u16 = 0, + body: std.ArrayList(u8), + + fn headerCallback(response: Response) anyerror!bool { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + lp.assert(response.status() != null, "HttpClient.SyncRequest.headerCallback", .{ .value = response.status() }); + self.status = response.status().?; + if (response.contentLength()) |cl| { + try self.body.ensureTotalCapacity(self.allocator, cl); + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + try self.body.appendSlice(self.allocator, data); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .done; + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .{ .err = err }; + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .shutdown; } }; -fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void { - const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url); +pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) !SyncResponse { + var sync_ctx = SyncContext{ .allocator = allocator, .body = .empty }; + errdefer sync_ctx.body.deinit(allocator); - if (!entry.found_existing) { - errdefer self.allocator.free(robots_url); + try self.request(.{ + .params = params, + .ctx = &sync_ctx, + .header_callback = SyncContext.headerCallback, + .data_callback = SyncContext.dataCallback, + .done_callback = SyncContext.doneCallback, + .error_callback = SyncContext.errorCallback, + .shutdown_callback = SyncContext.shutdownCallback, + }); - // If we aren't already fetching this robots, - // we want to create a new queue for it and add this request into it. - entry.value_ptr.* = .empty; - - const ctx = try self.allocator.create(RobotsRequestContext); - errdefer self.allocator.destroy(ctx); - ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty }; - const headers = try self.newHeaders(); - - log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.processRequest(.{ - .ctx = ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, - .header_callback = robotsHeaderCallback, - .data_callback = robotsDataCallback, - .done_callback = robotsDoneCallback, - .error_callback = robotsErrorCallback, - .shutdown_callback = robotsShutdownCallback, - }); - } else { - // Not using our own robots URL, only using the one from the first request. - self.allocator.free(robots_url); - } - - try entry.value_ptr.append(self.allocator, req); -} - -fn robotsHeaderCallback(response: Response) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - // Robots callbacks only happen on real live requests. - const transfer = response.inner.transfer; - - if (transfer.response_header) |hdr| { - log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); - ctx.status = hdr.status; - } - - if (transfer.getContentLength()) |cl| { - try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl); - } - - return true; -} - -fn robotsDataCallback(response: Response, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - try ctx.buffer.appendSlice(ctx.client.allocator, data); -} - -fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - var allowed = true; - - switch (ctx.status) { - 200 => { - if (ctx.buffer.items.len > 0) { - const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes( - ctx.client.getUserAgent(), - ctx.buffer.items, - ) catch blk: { - log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); - // If we fail to parse, we just insert it as absent and ignore. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - break :blk null; - }; - - if (robots) |r| { - try ctx.client.network.robot_store.put(ctx.robots_url, r); - const path = URL.getPathname(ctx.req.url); - allowed = r.isAllowed(path); - } - } - }, - 404 => { - log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); - // If we get a 404, we just insert it as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - else => { - log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status }); - // If we get an unexpected status, we just insert as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - } - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsDoneCallbacke empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); - queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); - } else { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } - } -} - -fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.warn(.http, "robots fetch failed", .{ .err = err }); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - // On error, allow all queued requests to proceed - for (queued.value.items) |queued_req| { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } -} - -fn robotsShutdownCallback(ctx_ptr: *anyopaque) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.debug(.http, "robots fetch shutdown", .{}); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (queued_req.shutdown_callback) |shutdown_cb| { - shutdown_cb(queued_req.ctx); - } - } -} - -fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { - // The request was intercepted and is blocking. This is messy, but our - // callers, the ScriptManager -> Page, don't have a great way to stop the - // parser and return control to the CDP server to wait for the interception - // response. We have some information on the CDPClient, so we'll do the - // blocking here. (This is a bit of a legacy thing. Initially the Client - // had a 'extra_socket' that it could monitor. It was named 'extra_socket' - // to appear generic, but really, that 'extra_socket' was always the CDP - // socket. Because we already had the "extra_socket" here, it was easier to - // make it even more CDP- aware and turn `extra_socket: socket_t` into the - // current CDPClient and do the blocking here). - const cdp_client = self.cdp_client.?; - const ctx = cdp_client.ctx; - - if (cdp_client.blocking_read_start(ctx) == false) { - return error.BlockingInterceptFailure; - } - - defer _ = cdp_client.blocking_read_end(ctx); - - while (true) { - if (cdp_client.blocking_read(ctx) == false) { - return error.BlockingInterceptFailure; - } - - switch (transfer._intercept_state) { - .pending => continue, // keep waiting - .@"continue" => return true, - .abort => |err| { - transfer.abort(err); - return false; + while (sync_ctx.completion == .in_progress) { + const status = try self.tick(200); + log.debug(.http, "sync request tick", .{ .status = status }); + switch (status) { + .cdp_socket => { + const cdp = self.cdp_client.?; + _ = cdp.blocking_read(cdp.ctx); }, - .fulfilled => { - // callbacks already called, just need to cleanups - transfer.deinit(); - return false; - }, - .not_intercepted => unreachable, + .normal => continue, } } + + switch (sync_ctx.completion) { + .in_progress => @panic("Impossible to be in progress here."), + .done, .shutdown => return .{ + .status = sync_ctx.status, + .body = sync_ctx.body, + }, + .err => |e| return e, + } } // Above, request will not process if there's an interception request. In such @@ -706,50 +505,6 @@ fn process(self: *Client, transfer: *Transfer) !void { self.queue.append(&transfer._node); } -// For an intercepted request -pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.blocking) { - return self.process(transfer); - } - transfer._intercept_state = .@"continue"; -} - -// For an intercepted request -pub fn abortTransfer(self: *Client, transfer: *Transfer) void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.blocking) { - transfer.abort(error.Abort); - } - transfer._intercept_state = .{ .abort = error.Abort }; -} - -// For an intercepted request -pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - try transfer.fulfill(status, headers, body); - if (!transfer.req.blocking) { - transfer.deinit(); - return; - } - transfer._intercept_state = .fulfilled; -} - pub fn nextReqId(self: *Client) u32 { return self.next_request_id +% 1; } @@ -761,17 +516,13 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.headers.deinit(); - const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); - const id = self.incrReqId(); transfer.* = .{ .start_time = timestamp(.monotonic), - .arena = ArenaAllocator.init(self.allocator), - .id = id, - .url = req.url, + .id = req.params.request_id, + .url = req.params.url, .req = req, .client = self, }; @@ -788,11 +539,6 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.notification.dispatch(.http_request_fail, &.{ - .transfer = transfer, - .err = err, - }); - if (execute_callback) { transfer.req.error_callback(transfer.req.ctx, err); } else if (transfer.req.shutdown_callback) |cb| { @@ -900,9 +646,6 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { - // Detect auth challenge from response headers. - // Also check on RecvError: proxy may send 407 with headers before - // closing the connection (CONNECT tunnel not yet established). if (msg.err == null or msg.err.? == error.RecvError) { transfer.detectAuthChallenge(msg.conn); } @@ -911,43 +654,21 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // TODO give a way to configure the number of auth retries. if (transfer._auth_challenge != null and transfer._tries < 10) { var wait_for_interception = false; - transfer.req.notification.dispatch( + transfer.req.params.notification.dispatch( .http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, ); if (wait_for_interception) { - self.intercepted += 1; + self.interception_layer.intercepted += 1; if (comptime IS_DEBUG) { - log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted }); + log.debug(.http, "wait for auth interception", .{ .intercepted = self.interception_layer.intercepted }); } - transfer._intercept_state = .pending; // Whether or not this is a blocking request, we're not going // to process it now. We can end the transfer, which will // release the easy handle back into the pool. The transfer // is still valid/alive (just has no handle). transfer.releaseConn(); - if (!transfer.req.blocking) { - // In the case of an async request, we can just "forget" - // about this transfer until it gets updated asynchronously - // from some CDP command. - return false; - } - - // In the case of a sync request, we need to block until we - // get the CDP command for handling this case. - if (try self.waitForInterceptedResponse(transfer)) { - // we've been asked to continue with the request - // we can't process it here, since we're already inside - // a process, so we need to queue it and wait for the - // next tick (this is why it was safe to releaseConn - // above, because even in the "blocking" path, we still - // only process it on the next tick). - self.queue.append(&transfer._node); - } else { - // aborted, already cleaned up - } - return false; } } @@ -1017,34 +738,18 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.notification.dispatch(.http_response_data, &.{ - .data = body, - .transfer = transfer, - }); - if (transfer.aborted) { transfer.requestFailed(error.Abort, true); return true; } } - if (transfer._pending_cache_metadata) |metadata| { - const cache = &self.network.cache.?; - cache.put(metadata.*, body) catch |err| { - log.warn(.cache, "cache put failed", .{ .err = err }); - }; - } - // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); try transfer.req.done_callback(transfer.req.ctx); - transfer.req.notification.dispatch(.http_request_done, &.{ - .transfer = transfer, - }); - return true; } @@ -1154,7 +859,12 @@ fn ensureNoActiveConnection(self: *const Client) !void { } } -pub const Request = struct { +pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + arena: Allocator = undefined, + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + request_id: u32 = undefined, + frame_id: u32, loader_id: u32, method: Method, @@ -1168,23 +878,6 @@ pub const Request = struct { notification: *Notification, timeout_ms: u32 = 0, - // This is only relevant for intercepted requests. If a request is flagged - // as blocking AND is intercepted, then it'll be up to us to wait until - // we receive a response to the interception. This probably isn't ideal, - // but it's harder for our caller (ScriptManager) to deal with this. One - // reason for that is the Http Client is already a bit CDP-aware. - blocking: bool = false, - - // arbitrary data that can be associated with this request - ctx: *anyopaque = undefined, - - start_callback: ?*const fn (response: Response) anyerror!void = null, - header_callback: *const fn (response: Response) anyerror!bool, - data_callback: *const fn (response: Response, data: []const u8) anyerror!void, - done_callback: *const fn (ctx: *anyopaque) anyerror!void, - error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, - shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, - const ResourceType = enum { document, xhr, @@ -1204,6 +897,62 @@ pub const Request = struct { }; } }; + + pub fn deinit(self: *const RequestParams) void { + self.headers.deinit(); + } +}; + +pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + + params: RequestParams, + // arbitrary data that can be associated with this request + ctx: *anyopaque = undefined, + + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, + + pub fn getCookieString(self: *Request) !?[:0]const u8 { + const jar = self.params.cookie_jar orelse return null; + var aw: std.Io.Writer.Allocating = .init(self.params.arena); + try jar.forRequest(self.params.url, &aw.writer, .{ + .is_http = true, + .origin_url = self.params.cookie_origin, + .is_navigation = self.params.resource_type == .document, + }); + const written = aw.written(); + if (written.len == 0) return null; + try aw.writer.writeByte(0); + return written.ptr[0..written.len :0]; + } + + pub fn deinit(self: *const Request) void { + self.params.deinit(); + } +}; + +pub const FulfilledResponse = struct { + status: u16, + url: [:0]const u8, + headers: []const http.Header, + body: ?[]const u8, + + pub fn contentType(self: *const FulfilledResponse) ?[]const u8 { + for (self.headers) |hdr| { + if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) return hdr.value; + } + return null; + } }; pub const Response = struct { @@ -1211,6 +960,7 @@ pub const Response = struct { inner: union(enum) { transfer: *Transfer, cached: *const CachedResponse, + fulfilled: *const FulfilledResponse, }, pub fn fromTransfer(transfer: *Transfer) Response { @@ -1221,10 +971,15 @@ pub const Response = struct { return .{ .ctx = ctx, .inner = .{ .cached = resp } }; } + pub fn fromFulfilled(ctx: *anyopaque, fulfilled: *const FulfilledResponse) Response { + return .{ .ctx = ctx, .inner = .{ .fulfilled = fulfilled } }; + } + pub fn status(self: Response) ?u16 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.status else null, .cached => |c| c.metadata.status, + .fulfilled => |f| f.status, }; } @@ -1232,6 +987,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| if (t.response_header) |*rh| rh.contentType() else null, .cached => |c| c.metadata.content_type, + .fulfilled => |f| f.contentType(), }; } @@ -1242,13 +998,14 @@ pub const Response = struct { .buffer => |buf| @intCast(buf.len), .file => |f| @intCast(f.len), }, + .fulfilled => |f| if (f.body) |b| @intCast(b.len) else null, }; } pub fn redirectCount(self: Response) ?u32 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.redirect_count else null, - .cached => 0, + .cached, .fulfilled => 0, }; } @@ -1256,6 +1013,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.url, .cached => |c| c.metadata.url, + .fulfilled => |f| f.url, }; } @@ -1263,13 +1021,14 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.responseHeaderIterator(), .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + .fulfilled => |f| HeaderIterator{ .list = .{ .list = f.headers } }, }; } pub fn abort(self: Response, err: anyerror) void { switch (self.inner) { .transfer => |t| t.abort(err), - .cached => {}, + .cached, .fulfilled => {}, } } @@ -1277,12 +1036,21 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| try t.format(writer), .cached => |c| try c.format(writer), + .fulfilled => |f| try writer.print("fulfilled {s}", .{f.url}), }; } }; +pub const SyncResponse = struct { + status: u16, + body: std.ArrayList(u8), + + pub fn deinit(self: *SyncResponse, allocator: Allocator) void { + self.body.deinit(allocator); + } +}; + pub const Transfer = struct { - arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, @@ -1290,7 +1058,6 @@ pub const Transfer = struct { // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, - _pending_cache_metadata: ?*CacheMetadata = null, start_time: u64, aborted: bool = false, @@ -1327,15 +1094,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - _intercept_state: InterceptState = .not_intercepted, - - const InterceptState = union(enum) { - not_intercepted, - pending, - @"continue", - abort: anyerror, - fulfilled, - }; fn releaseConn(self: *Transfer) void { if (self._conn) |conn| { @@ -1350,8 +1108,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.headers.deinit(); - self.arena.deinit(); + self.client.deinitRequest(self.req); self.client.transfer_pool.destroy(self); } @@ -1408,11 +1165,6 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.notification.dispatch(.http_request_fail, &.{ - .transfer = self, - .err = err, - }); - if (execute_callback) { self.req.error_callback(self.req.ctx, err); } else if (self.req.shutdown_callback) |cb| { @@ -1430,38 +1182,32 @@ pub const Transfer = struct { try conn.setProxy(client.http_proxy); try conn.setTlsVerify(client.tls_verify, client.use_proxy); - try conn.setURL(req.url); - try conn.setMethod(req.method); - if (req.body) |b| { + try conn.setURL(req.params.url); + try conn.setMethod(req.params.method); + if (req.params.body) |b| { try conn.setBody(b); } else { try conn.setGetMode(); } - var header_list = req.headers; + var header_list = req.params.headers; try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); - // If we have WebBotAuth, sign our request. - if (client.network.web_bot_auth) |*wba| { - const authority = URL.getHost(req.url); - try wba.signRequest(self.arena.allocator(), &header_list, authority); - } - // Add cookies from cookie jar. - if (try self.getCookieString()) |cookies| { + if (try self.req.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); } conn.transport = .{ .http = self }; // Per-request timeout override (e.g. XHR timeout) - if (req.timeout_ms > 0) { - try conn.setTimeout(req.timeout_ms); + if (req.params.timeout_ms > 0) { + try conn.setTimeout(req.params.timeout_ms); } // add credentials - if (req.credentials) |creds| { + if (req.params.credentials) |creds| { if (self._auth_challenge != null and self._auth_challenge.?.source == .proxy) { try conn.setProxyCredentials(creds); } else { @@ -1510,23 +1256,9 @@ pub const Transfer = struct { } } - pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.url, &aw.writer, .{ - .is_http = true, - .origin_url = self.req.cookie_origin, - .is_navigation = self.req.resource_type == .document, - }); - const written = aw.written(); - if (written.len == 0) return null; - try aw.writer.writeByte(0); - return written.ptr[0..written.len :0]; - } - pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; - return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); + return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); } pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { @@ -1534,13 +1266,13 @@ pub const Transfer = struct { self.url = url; // for the request itself - self.req.url = url; + self.req.params.url = url; } fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena; transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1548,7 +1280,7 @@ pub const Transfer = struct { } // retrieve cookies from the redirect's response. - if (req.cookie_jar) |jar| { + if (req.params.cookie_jar) |jar| { var i: usize = 0; while (conn.getResponseHeader("set-cookie", i)) |ct| : (i += 1) { try jar.populateFromResponse(transfer.url, ct.value); @@ -1592,8 +1324,8 @@ pub const Transfer = struct { // 307, 308 → keep method and body. const status = try conn.getResponseCode(); if (status == 301 or status == 302 or status == 303) { - req.method = .GET; - req.body = null; + req.params.method = .GET; + req.params.body = null; } } @@ -1620,11 +1352,11 @@ pub const Transfer = struct { } pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { - self.req.credentials = userpwd; + self.req.params.credentials = userpwd; } pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const http.Header) !void { - self.req.headers.deinit(); + self.req.params.headers.deinit(); var buf: std.ArrayList(u8) = .empty; var new_headers = try self.client.newHeaders(); @@ -1636,7 +1368,7 @@ pub const Transfer = struct { try buf.append(allocator, 0); // null terminated try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); } - self.req.headers = new_headers; + self.req.params.headers = new_headers; } // abortAuthChallenge is called when an auth challenge interception is @@ -1644,15 +1376,12 @@ pub const Transfer = struct { // before interception process. pub fn abortAuthChallenge(self: *Transfer) void { if (comptime IS_DEBUG) { - std.debug.assert(self._intercept_state != .not_intercepted); - log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); + log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.interception_layer.intercepted }); } - self.client.intercepted -= 1; - if (!self.req.blocking) { - self.abort(error.AbortAuthChallenge); - return; - } - self._intercept_state = .{ .abort = error.AbortAuthChallenge }; + + self.client.interception_layer.intercepted -= 1; + self.abort(error.AbortAuthChallenge); + return; } // headerDoneCallback is called once the headers have been read. @@ -1664,7 +1393,7 @@ pub const Transfer = struct { try transfer.buildResponseHeader(conn); - if (transfer.req.cookie_jar) |jar| { + if (transfer.req.params.cookie_jar) |jar| { var i: usize = 0; while (true) { const ct = conn.getResponseHeader("set-cookie", i); @@ -1684,65 +1413,11 @@ pub const Transfer = struct { } } - transfer.req.notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; }; - if (transfer.client.network.cache != null and transfer.req.method == .GET) { - const rh = &transfer.response_header.?; - const allocator = transfer.arena.allocator(); - - const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; - - const maybe_cm = try Cache.tryCache( - allocator, - std.time.timestamp(), - transfer.url, - rh.status, - rh.contentType(), - if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, - vary, - if (conn.getResponseHeader("age", 0)) |h| h.value else null, - conn.getResponseHeader("set-cookie", 0) != null, - conn.getResponseHeader("authorization", 0) != null, - ); - - if (maybe_cm) |cm| { - var iter = transfer.responseHeaderIterator(); - var header_list = try iter.collect(allocator); - const end_of_response = header_list.items.len; - - if (vary) |vary_str| { - var req_it = transfer.req.headers.iterator(); - - while (req_it.next()) |hdr| { - var vary_iter = std.mem.splitScalar(u8, vary_str, ','); - - while (vary_iter.next()) |part| { - const name = std.mem.trim(u8, part, &std.ascii.whitespace); - if (std.ascii.eqlIgnoreCase(hdr.name, name)) { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }); - } - } - } - } - - const metadata = try transfer.arena.allocator().create(CacheMetadata); - metadata.* = cm; - metadata.headers = header_list.items[0..end_of_response]; - metadata.vary_headers = header_list.items[end_of_response..]; - transfer._pending_cache_metadata = metadata; - } - } - return proceed and transfer.aborted == false; } @@ -1774,7 +1449,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {}; } } @@ -1787,7 +1462,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; @@ -1800,64 +1475,13 @@ pub const Transfer = struct { } pub fn responseHeaderIterator(self: *Transfer) HeaderIterator { - if (self._conn) |conn| { - // If we have a connection, than this is a real curl request and we - // iterate through the header that curl maintains. - return .{ .curl = .{ .conn = conn } }; - } - // If there's no handle, it either means this is being called before - // the request is even being made (which would be a bug in the code) - // or when a response was injected via transfer.fulfill. The injected - // header should be iterated, since there is no handle/easy. - return .{ .list = .{ .list = self.response_header.?._injected_headers } }; - } + // We always have a real curl request here. We handle injection up in InterceptionLayer. + lp.assert(self._conn != null, "Transfer.responseHeaderIterator", .{ .value = self._conn != null }); + const conn = self._conn.?; - pub fn fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (transfer._conn != null) { - // should never happen, should have been intercepted/paused, and then - // either continued, aborted or fulfilled once. - @branchHint(.unlikely); - return error.RequestInProgress; - } - - transfer._fulfill(status, headers, body) catch |err| { - transfer.req.error_callback(transfer.req.ctx, err); - return err; - }; - } - - fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - const req = &transfer.req; - if (req.start_callback) |cb| { - try cb(Response.fromTransfer(transfer)); - } - - transfer.response_header = .{ - .status = status, - .url = req.url, - .redirect_count = 0, - ._injected_headers = headers, - }; - for (headers) |hdr| { - if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) { - const len = @min(hdr.value.len, ResponseHead.MAX_CONTENT_TYPE_LEN); - @memcpy(transfer.response_header.?._content_type[0..len], hdr.value[0..len]); - transfer.response_header.?._content_type_len = len; - break; - } - } - - lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(Response.fromTransfer(transfer)) == false) { - transfer.abort(error.Abort); - return; - } - - if (body) |b| { - try req.data_callback(Response.fromTransfer(transfer), b); - } - - try req.done_callback(req.ctx); + // If we have a connection, than this is a real curl request and we + // iterate through the header that curl maintains. + return .{ .curl = .{ .conn = conn } }; } // This function should be called during the dataCallback. Calling it after @@ -1890,6 +1514,21 @@ pub const Transfer = struct { } }; +pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { + if (comptime IS_DEBUG) { + lp.assert(self.interception_layer.intercepted > 0, "HttpClient.continueTransfer", .{ .value = self.interception_layer.intercepted }); + log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); + } + + self.interception_layer.intercepted -= 1; + return self.process(transfer); +} + +pub fn deinitRequest(self: *Client, req: Request) void { + req.deinit(); + self.network.app.arena_pool.release(req.params.arena); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 68a7ad59..2489382e 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -185,7 +185,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { try frame.dispatchLoad(); const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { frame.notifyNetworkAlmostIdle(); } @@ -211,7 +211,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { // because is_cdp is false, and that can only be // the case when interception isn't possible. if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); + std.debug.assert(http_client.interception_layer.intercepted == 0); } if (browser.hasBackgroundTasks()) { diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 5f82b88e..12cc6cb5 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -265,13 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e } if (remote_url) |url| { - errdefer { - if (is_blocking == false) { - self.scriptList(script).remove(&script.node); - } - // Let the outer errdefer handle releasing the arena if client.request fails - } - if (comptime IS_DEBUG) { var ls: js.Local.Scope = undefined; frame.js.localScope(&ls); @@ -285,23 +278,48 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e }); } - { - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; - try self.client.request(.{ + const headers = try self.getHeaders(); + errdefer headers.deinit(); + + if (is_blocking) { + const response = try self.client.syncRequest(arena, .{ .url = url, - .ctx = script, .method = .GET, .frame_id = frame._frame_id, .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, + .headers = headers, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, .notification = frame._session.notification, + }); + + script.source = .{ .remote = response.body }; + script.status = response.status; + script.complete = true; + } else { + errdefer { + self.scriptList(script).remove(&script.node); + // Let the outer errdefer handle releasing the arena if client.request fails + } + + try self.client.request(.{ + .ctx = script, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = headers, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -317,29 +335,21 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e return; } - // this is , it needs to block the caller - // until it's evaluated - var client = self.client; - while (true) { - if (!script.complete) { - _ = try client.tick(200); - continue; - } - if (script.status == 0) { - // an error (that we already logged) - script.deinit(); - return; - } - - // could have already been evaluating if this is dynamically added - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer { - self.is_evaluating = was_evaluating; - script.deinit(); - } - return script.eval(frame); + if (script.status == 0) { + // an error (that we already logged) + script.deinit(); + return; } + + // could have already been evaluating if this is dynamically added + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer { + self.is_evaluating = was_evaluating; + script.deinit(); + } + + script.eval(frame); } fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { @@ -407,16 +417,18 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -513,16 +525,18 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), .ctx = script, - .resource_type = .script, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .resource_type = .script, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -664,7 +678,6 @@ pub const Script = struct { debug_transfer_aborted: bool = false, debug_transfer_bytes_received: usize = 0, debug_transfer_notified_fail: bool = false, - debug_transfer_intercept_state: u8 = 0, debug_transfer_auth_challenge: bool = false, debug_transfer_easy_id: usize = 0, @@ -740,7 +753,6 @@ pub const Script = struct { .a3 = self.debug_transfer_aborted, .a4 = self.debug_transfer_bytes_received, .a5 = self.debug_transfer_notified_fail, - .a7 = self.debug_transfer_intercept_state, .a8 = self.debug_transfer_auth_challenge, .a9 = self.debug_transfer_easy_id, .b1 = transfer.id, @@ -748,7 +760,6 @@ pub const Script = struct { .b3 = transfer.aborted, .b4 = transfer.bytes_received, .b5 = transfer._notified_fail, - .b7 = @intFromEnum(transfer._intercept_state), .b8 = transfer._auth_challenge != null, .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, }); @@ -758,7 +769,6 @@ pub const Script = struct { self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; }, diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 558d4cce..81d74116 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -95,15 +95,17 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { const http_client = session.browser.http_client; http_client.request(.{ .ctx = self, - .url = resolved_url, - .method = .GET, - .headers = try http_client.newHeaders(), - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .resource_type = .script, - .cookie_jar = &session.cookie_jar, - .cookie_origin = resolved_url, - .notification = session.notification, + .params = .{ + .url = resolved_url, + .method = .GET, + .headers = try http_client.newHeaders(), + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .resource_type = .script, + .cookie_jar = &session.cookie_jar, + .cookie_origin = resolved_url, + .notification = session.notification, + }, .header_callback = httpHeaderCallback, .data_callback = httpDataCallback, .done_callback = httpDoneCallback, diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 623037d6..777f8311 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -94,16 +94,18 @@ pub fn init(input: Input, options: ?InitOpts, frame: *Frame) !js.Promise { try http_client.request(.{ .ctx = fetch, - .url = request._url, - .method = request._method, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = request._body, - .headers = headers, - .resource_type = .fetch, - .cookie_jar = cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = request._url, + .method = request._method, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = request._body, + .headers = headers, + .resource_type = .fetch, + .cookie_jar = cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 35f7f7fa..19a7676b 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -257,17 +257,19 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { http_client.request(.{ .ctx = self, - .url = self._url, - .method = self._method, - .headers = headers, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = self._request_body, - .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, - .cookie_origin = frame.url, - .resource_type = .xhr, - .timeout_ms = self._timeout, - .notification = frame._session.notification, + .params = .{ + .url = self._url, + .method = self._method, + .headers = headers, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = self._request_body, + .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, + .cookie_origin = frame.url, + .resource_type = .xhr, + .timeout_ms = self._timeout, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 16a08cc9..e646f4b8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,8 +450,25 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingTransfers()) |transfer| { - transfer.abort(error.ClientDisconnect); + const http_client = browser.http_client; + for (self.intercept_state.pendingIntercepts()) |intercept| { + defer { + lp.assert( + http_client.interception_layer.intercepted > 0, + "BrowserContext.deinit.intercepted", + .{ .value = http_client.interception_layer.intercepted }, + ); + http_client.interception_layer.intercepted -= 1; + } + switch (intercept) { + .transfer => |t| { + t.abort(error.ClientDisconnect); + }, + .request => |r| { + defer http_client.deinitRequest(r); + r.error_callback(r.ctx, error.ClientDisconnect); + }, + } } for (self.isolated_worlds.items) |world| { @@ -668,7 +685,7 @@ pub const BrowserContext = struct { const arena = self.frame_arena; // Prepare the captured response value. - const id = msg.transfer.id; + const id = msg.request.params.request_id; const gop = try self.captured_responses.getOrPut(arena, id); if (!gop.found_existing) { gop.value_ptr.* = .{ @@ -676,8 +693,8 @@ pub const BrowserContext = struct { // Encode the data in base64 by default, but don't encode // for well known content-type. .must_encode = blk: { - const transfer = msg.transfer; - if (transfer.response_header.?.contentType()) |ct| { + const response = msg.response; + if (response.contentType()) |ct| { const mime = try Mime.parse(ct); if (!mime.isText()) { @@ -705,7 +722,7 @@ pub const BrowserContext = struct { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; - const id = msg.transfer.id; + const id = msg.request.params.request_id; const resp = self.captured_responses.getPtr(id) orelse lp.assert(false, "onHttpResponseData missinf captured response", .{}); return resp.data.appendSlice(arena, msg.data); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index d0a1d1db..edff1761 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,12 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.Transfer), + waiting: std.AutoArrayHashMapUnmanaged(u32, Pending), + + const Pending = union(enum) { + transfer: *HttpClient.Transfer, + request: HttpClient.Request, + }; pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +72,15 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, transfer: *HttpClient.Transfer) !void { - return self.waiting.put(self.allocator, transfer.id, transfer); + pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request }); } - pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.Transfer { + pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void { + return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer }); + } + + pub fn remove(self: *InterceptState, request_id: u32) ?Pending { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +89,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.Transfer { + pub fn pendingIntercepts(self: *const InterceptState) []Pending { return self.waiting.values(); } }; @@ -190,29 +199,28 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. const session_id = bc.session_id orelse return; // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + const request = intercept.request; + try bc.intercept_state.putRequest(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID + .networkId = &id.toRequestId(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -236,39 +244,50 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "continue", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .new_url = params.url, }); - const arena = transfer.arena.allocator(); + const arena = request.params.arena; // Update the request with the new parameters if (params.url) |url| { - try transfer.updateURL(try arena.dupeZ(u8, url)); + request.params.url = try arena.dupeZ(u8, url); } if (params.method) |method| { - transfer.req.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + request.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { - // Not obvious, but cmd.arena is safe here, since the headers will get - // duped by libcurl. transfer.arena is more obvious/safe, but cmd.arena - // is more efficient (it's re-used) - try transfer.replaceRequestHeaders(cmd.arena, headers); + request.params.headers.deinit(); + + var buf: std.ArrayList(u8) = .empty; + var new_headers = try bc.cdp.browser.http_client.newHeaders(); + for (headers) |hdr| { + defer buf.clearRetainingCapacity(); + try std.fmt.format(buf.writer(cmd.arena), "{s}: {s}", .{ hdr.name, hdr.value }); + try buf.append(cmd.arena, 0); + try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); + } + request.params.headers = new_headers; } if (params.postData) |b| { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.body = body; + request.params.body = body; } - try bc.cdp.browser.http_client.continueTransfer(transfer); + // todo: replace. + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -292,14 +311,18 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const transfer = pending.transfer; + const request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", - .id = transfer.id, + .id = request.params.request_id, .response = params.authChallengeResponse.response, }); + const client = bc.cdp.browser.http_client; + if (params.authChallengeResponse.response != .ProvideCredentials) { transfer.abortAuthChallenge(); return cmd.sendResult(null, .{}); @@ -308,17 +331,18 @@ fn continueWithAuth(cmd: *CDP.Command) !void { // cancel the request, deinit the transfer on error. errdefer transfer.abortAuthChallenge(); - // restart the request with the provided credentials. - const arena = transfer.arena.allocator(); - transfer.updateCredentials( - try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ + const arena = request.params.arena; + transfer.updateCredentials(try std.fmt.allocPrintSentinel( + arena, + "{s}:{s}", + .{ params.authChallengeResponse.username, params.authChallengeResponse.password, - }, 0), - ); + }, + 0, + )); - transfer.reset(); - try bc.cdp.browser.http_client.continueTransfer(transfer); + try client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -341,12 +365,14 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .status = params.responseCode, .body = params.body != null, }); @@ -354,13 +380,13 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } - try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); - + const client = bc.cdp.browser.http_client; + try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -374,13 +400,16 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; - defer bc.cdp.browser.http_client.abortTransfer(transfer); + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = pending.request; + + const client = bc.cdp.browser.http_client; + defer client.interception_layer.abortRequest(client, request); log.info(.cdp, "request intercept", .{ .state = "fail", .id = request_id, - .url = transfer.url, + .url = request.params.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); @@ -396,15 +425,16 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati // TODO: What to do when receiving replies for a previous frame's requests? const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + try bc.intercept_state.putTransfer(transfer); + var request = transfer.req; const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; try bc.cdp.sendEvent("Fetch.authRequired", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(&request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", @@ -416,13 +446,13 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId(transfer), + .networkId = &id.toRequestId(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await continueWithAuth diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index ff5778e9..0554681a 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -27,6 +27,8 @@ const Mime = @import("../../browser/Mime.zig"); const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; const CdpStorage = @import("storage.zig"); @@ -260,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId(msg.transfer), + .requestId = &id.toRequestId(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -273,24 +275,23 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; - const frame_id = req.frame_id; + const req = msg.request; + const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; // Modify request with extra CDP headers for (bc.extra_headers.items) |extra| { - try req.headers.add(extra); + try req.params.headers.add(extra); } // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .type = req.resource_type.string(), + .requestId = &id.toRequestId(req), + .loaderId = &id.toLoaderId(req.params.loader_id), + .type = req.params.resource_type.string(), .documentURL = frame.url, - .request = TransferAsRequestWriter.init(transfer), + .request = RequestWriter.init(req), .initiator = .{ .type = "other" }, .redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo .hasUserGesture = false, @@ -304,15 +305,14 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ - .frameId = &id.toFrameId(req.frame_id), - .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .response = TransferAsResponseWriter.init(arena, transfer), + .frameId = &id.toFrameId(req.params.frame_id), + .requestId = &id.toRequestId(req), + .loaderId = &id.toLoaderId(req.params.loader_id), + .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); } @@ -321,36 +321,37 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request // detachTarget could be called, in which case, we still have a frame doing // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; + const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId(transfer), - .encodedDataLength = transfer.bytes_received, + .requestId = &id.toRequestId(req), + .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } -pub const TransferAsRequestWriter = struct { - transfer: *Transfer, +pub const RequestWriter = struct { + request: *Request, - pub fn init(transfer: *Transfer) TransferAsRequestWriter { + pub fn init(request: *Request) RequestWriter { return .{ - .transfer = transfer, + .request = request, }; } - pub fn jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { + pub fn jsonStringify(self: *const RequestWriter, jws: anytype) !void { self._jsonStringify(jws) catch return error.WriteFailed; } - fn _jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - const transfer = self.transfer; + + fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void { + const request = self.request; try jws.beginObject(); { try jws.objectField("url"); - try jws.write(transfer.url); + try jws.write(request.params.url); } { - const frag = URL.getHash(transfer.url); + const frag = URL.getHash(request.params.url); if (frag.len > 0) { try jws.objectField("urlFragment"); try jws.write(frag); @@ -359,23 +360,23 @@ pub const TransferAsRequestWriter = struct { { try jws.objectField("method"); - try jws.write(@tagName(transfer.req.method)); + try jws.write(@tagName(request.params.method)); } { try jws.objectField("hasPostData"); - try jws.write(transfer.req.body != null); + try jws.write(request.params.body != null); } { try jws.objectField("headers"); try jws.beginObject(); - var it = transfer.req.headers.iterator(); + var it = request.params.headers.iterator(); while (it.next()) |hdr| { try jws.objectField(hdr.name); try jws.write(hdr.value); } - if (try transfer.getCookieString()) |cookies| { + if (try request.getCookieString()) |cookies| { try jws.objectField("Cookie"); try jws.write(cookies[0 .. cookies.len - 1]); } @@ -385,34 +386,31 @@ pub const TransferAsRequestWriter = struct { } }; -const TransferAsResponseWriter = struct { +const ResponseWriter = struct { arena: Allocator, - transfer: *Transfer, + response: *const Response, - fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { + fn init(arena: Allocator, response: *const Response) ResponseWriter { return .{ .arena = arena, - .transfer = transfer, + .response = response, }; } - pub fn jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { + pub fn jsonStringify(self: *const ResponseWriter, jws: anytype) !void { self._jsonStringify(jws) catch return error.WriteFailed; } - fn _jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - const transfer = self.transfer; + fn _jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + const response = self.response; try jws.beginObject(); { try jws.objectField("url"); - try jws.write(transfer.url); + try jws.write(response.url()); } - if (transfer.response_header) |*rh| { - // it should not be possible for this to be false, but I'm not - // feeling brave today. - const status = rh.status; + if (response.status()) |status| { try jws.objectField("status"); try jws.write(status); @@ -422,7 +420,7 @@ const TransferAsResponseWriter = struct { { const mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { + if (response.contentType()) |ct| { break :blk try Mime.parse(ct); } break :blk .unknown; @@ -437,7 +435,8 @@ const TransferAsResponseWriter = struct { { try jws.objectField("timing"); try jws.write(.{ - .requestTime = transfer.start_time, + // TODO: fix + .requestTime = -1, .connectEnd = -1, .connectStart = -1, .dnsEnd = -1, @@ -458,7 +457,7 @@ const TransferAsResponseWriter = struct { // common to get these from a server (e.g. for Cache-Control), but // Chrome joins these. So we have to too. const arena = self.arena; - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; while (it.next()) |hdr| { const gop = try map.getOrPut(arena, hdr.name); diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index c77319ae..239b5122 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -145,7 +145,7 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void { const http_client = frame._session.browser.http_client; const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id); } diff --git a/src/cdp/id.zig b/src/cdp/id.zig index a5d1286f..f6889d24 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -40,15 +40,14 @@ pub fn toLoaderId(id: u32) [14]u8 { // requestId has special requirements. If it's the main document navigation, // then it should match the loader id. -const Transfer = @import("../browser/HttpClient.zig").Transfer; -pub fn toRequestId(transfer: *const Transfer) [14]u8 { - const req = transfer.req; - if (req.resource_type == .document) { - return toLoaderId(req.loader_id); +const Request = @import("../browser/HttpClient.zig").Request; +pub fn toRequestId(req: *const Request) [14]u8 { + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); } var buf: [14]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable; + _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable; return buf; } diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig new file mode 100644 index 00000000..fd8a5e9f --- /dev/null +++ b/src/network/layer/CacheLayer.zig @@ -0,0 +1,232 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const Cache = @import("../cache/Cache.zig"); +const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata; +const CachedResponse = @import("../cache/Cache.zig").CachedResponse; +const Forward = @import("Forward.zig"); + +const CacheLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *CacheLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *CacheLayer = @ptrCast(@alignCast(ptr)); + const network = client.network; + + if (req.params.method != .GET) { + return self.next.request(client, req); + } + + const arena = req.params.arena; + + var iter = req.params.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (network.cache.?.get(arena, .{ + .url = req.params.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { + return serveFromCache(req, &cached); + } + + const cache_ctx = try arena.create(CacheContext); + cache_ctx.* = .{ + .arena = arena, + .client = client, + .forward = Forward.fromRequest(req), + .req_url = req.params.url, + .req_headers = req.params.headers, + }; + + const wrapped = cache_ctx.forward.wrapRequest( + req, + cache_ctx, + .{ + .start = CacheContext.startCallback, + .header = CacheContext.headerCallback, + .done = CacheContext.doneCallback, + .shutdown = CacheContext.shutdownCallback, + .err = CacheContext.errorCallback, + }, + ); + + return self.next.request(client, wrapped); +} + +fn serveFromCache(req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer switch (cached.data) { + .buffer => |_| {}, + .file => |f| f.file.close(), + }; + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |f| { + const file = f.file; + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + const reader = &file_reader.interface; + var read_buf: [1024]u8 = undefined; + var remaining = f.len; + while (remaining > 0) { + const read_len = @min(read_buf.len, remaining); + const n = try reader.readSliceShort(read_buf[0..read_len]); + if (n == 0) break; + remaining -= n; + try req.data_callback(response, read_buf[0..n]); + } + }, + } + + try req.done_callback(req.ctx); +} + +const CacheContext = struct { + arena: std.mem.Allocator, + client: *Client, + transfer: ?*Transfer = null, + forward: Forward, + req_url: [:0]const u8, + req_headers: http.Headers, + pending_metadata: ?*CachedMetadata = null, + + fn startCallback(response: Response) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + self.transfer = response.inner.transfer; + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + const allocator = self.arena; + + const transfer = response.inner.transfer; + var rh = &transfer.response_header.?; + + const conn = transfer._conn.?; + + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + + const maybe_cm = try Cache.tryCache( + allocator, + std.time.timestamp(), + transfer.url, + rh.status, + rh.contentType(), + if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, + vary, + if (conn.getResponseHeader("age", 0)) |h| h.value else null, + conn.getResponseHeader("set-cookie", 0) != null, + conn.getResponseHeader("authorization", 0) != null, + ); + + if (maybe_cm) |cm| { + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + + if (vary) |vary_str| { + var req_it = self.req_headers.iterator(); + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + } + + const metadata = try allocator.create(CachedMetadata); + metadata.* = cm; + metadata.headers = header_list.items[0..end_of_response]; + metadata.vary_headers = header_list.items[end_of_response..]; + self.pending_metadata = metadata; + } + + return self.forward.forwardHeader(response); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); + + if (self.pending_metadata) |metadata| { + const cache = &self.client.network.cache.?; + + log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); + cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { + log.warn(.http, "cache put failed", .{ .err = err }); + }; + log.debug(.browser, "http.cache.put", .{ .url = self.req_url }); + } + + return self.forward.forwardDone(); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardShutdown(); + } + + fn errorCallback(ctx: *anyopaque, e: anyerror) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardErr(e); + } +}; diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig new file mode 100644 index 00000000..b11ff23f --- /dev/null +++ b/src/network/layer/Forward.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; + +const Forward = @This(); + +ctx: *anyopaque, +start: ?Request.StartCallback, +header: Request.HeaderCallback, +data: Request.DataCallback, +done: Request.DoneCallback, +err: Request.ErrorCallback, +shutdown: ?Request.ShutdownCallback, + +pub fn fromRequest(req: Request) Forward { + return .{ + .ctx = req.ctx, + .start = req.start_callback, + .header = req.header_callback, + .data = req.data_callback, + .done = req.done_callback, + .err = req.error_callback, + .shutdown = req.shutdown_callback, + }; +} + +pub const Overrides = struct { + start: ?Request.StartCallback = null, + header: ?Request.HeaderCallback = null, + data: ?Request.DataCallback = null, + done: ?Request.DoneCallback = null, + err: ?Request.ErrorCallback = null, + shutdown: ?Request.ShutdownCallback = null, +}; + +pub fn wrapRequest( + self: *Forward, + req: Request, + new_ctx: anytype, + overrides: Overrides, +) Request { + const T = @TypeOf(new_ctx.*); + const PassthroughT = makePassthrough(T, "forward"); + var wrapped = req; + wrapped.ctx = new_ctx; + wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; + wrapped.header_callback = overrides.header orelse PassthroughT.header; + wrapped.data_callback = overrides.data orelse PassthroughT.data; + wrapped.done_callback = overrides.done orelse PassthroughT.done; + wrapped.error_callback = overrides.err orelse PassthroughT.err; + wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null; + return wrapped; +} + +fn makePassthrough(comptime T: type, comptime field: []const u8) type { + return struct { + pub fn start(response: Response) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardStart(response); + } + + pub fn header(response: Response) anyerror!bool { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardHeader(response); + } + + pub fn data(response: Response, chunk: []const u8) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardData(response, chunk); + } + + pub fn done(ctx_ptr: *anyopaque) anyerror!void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + return @field(self, field).forwardDone(); + } + + pub fn err(ctx_ptr: *anyopaque, e: anyerror) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardErr(e); + } + + pub fn shutdown(ctx_ptr: *anyopaque) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardShutdown(); + } + }; +} + +pub fn forwardStart(self: Forward, response: Response) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + if (self.start) |cb| try cb(fwd); +} + +pub fn forwardHeader(self: Forward, response: Response) anyerror!bool { + var fwd = response; + fwd.ctx = self.ctx; + return self.header(fwd); +} + +pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + return self.data(fwd, chunk); +} + +pub fn forwardDone(self: Forward) anyerror!void { + return self.done(self.ctx); +} + +pub fn forwardErr(self: Forward, e: anyerror) void { + self.err(self.ctx, e); +} + +pub fn forwardShutdown(self: Forward) void { + if (self.shutdown) |cb| cb(self.ctx); +} diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig new file mode 100644 index 00000000..165cd254 --- /dev/null +++ b/src/network/layer/InterceptionLayer.zig @@ -0,0 +1,270 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); +const lp = @import("lightpanda"); +const log = lp.log; + +const IS_DEBUG = builtin.mode == .Debug; + +const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const InterceptionLayer = @This(); + +// Count of intercepted requests. This is to help deal with intercepted requests. +// The client doesn't track intercepted transfers. If a request is intercepted, +// the client forgets about it and requires the interceptor to continue or abort +// it. That works well, except if we only rely on active, we might think there's +// no more network activity when, with interecepted requests, there might be more +// in the future. (We really only need this to properly emit a 'networkIdle' and +// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +intercepted: usize = 0, + +next: Layer = undefined, + +pub fn layer(self: *InterceptionLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { + const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); + + const intercept_ctx = try in_req.params.arena.create(InterceptContext); + intercept_ctx.* = .{ + .client = client, + .forward = Forward.fromRequest(in_req), + .layer = self, + .request = in_req, + }; + + var req = intercept_ctx.forward.wrapRequest( + in_req, + intercept_ctx, + .{ + .start = InterceptContext.startCallback, + .header = InterceptContext.headerCallback, + .data = InterceptContext.dataCallback, + .done = InterceptContext.doneCallback, + .err = InterceptContext.errorCallback, + .shutdown = InterceptContext.shutdownCallback, + }, + ); + + req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); + + var wait_for_interception = false; + req.params.notification.dispatch(.http_request_intercept, &.{ + .request = &req, + .wait_for_interception = &wait_for_interception, + }); + + log.debug(.http, "interception check", .{ + .wait_for_interception = wait_for_interception, + .intercepted = self.intercepted, + .url = req.params.url, + }); + + if (!wait_for_interception) { + return self.next.request(client, req); + } + + self.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); + } +} + +pub const InterceptContext = struct { + client: *Client, + forward: Forward, + layer: *InterceptionLayer, + request: Request, + content_length: usize = 0, + + fn startCallback(response: Response) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept start", .{ .url = self.request.params.url }); + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept header", .{ + .url = self.request.params.url, + .status = response.status(), + .content_length = response.contentLength(), + }); + + self.content_length = response.contentLength() orelse 0; + + self.request.params.notification.dispatch(.http_response_header_done, &.{ + .request = &self.request, + .response = &response, + }); + + return self.forward.forwardHeader(response); + } + + fn dataCallback(response: Response, chunk: []const u8) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept data", .{ + .url = self.request.params.url, + .len = chunk.len, + }); + + self.request.params.notification.dispatch(.http_response_data, &.{ + .data = chunk, + .request = &self.request, + }); + + return self.forward.forwardData(response, chunk); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept done", .{ + .url = self.request.params.url, + .content_length = self.content_length, + }); + + self.request.params.notification.dispatch(.http_request_done, &.{ + .request = &self.request, + .content_length = self.content_length, + }); + return self.forward.forwardDone(); + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept error", .{ + .url = self.request.params.url, + .err = err, + }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = err, + }); + self.forward.forwardErr(err); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = error.Shutdown, + }); + self.forward.forwardShutdown(); + } +}; + +// CDP Callbacks +// These handle their own clean up on errors with `self.next.request`. +// This is because they don't pass their error up the chain as they are async callbacks. + +pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted }); + log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + self.next.request(client, req) catch |err| { + const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + req.error_callback(req.ctx, err); + ctx.client.deinitRequest(req); + return err; + }; +} + +pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted }); + log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); + } + self.intercepted -= 1; + + req.error_callback(req.ctx, error.Abort); + client.deinitRequest(req); +} + +fn fulfillInner( + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + const fulfilled = FulfilledResponse{ + .status = status, + .url = req.params.url, + .headers = headers, + .body = body, + }; + + const response = Response.fromFulfilled(req.ctx, &fulfilled); + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + if (body) |b| { + try req.data_callback(response, b); + } + + try req.done_callback(req.ctx); +} + +pub fn fulfillRequest( + self: *InterceptionLayer, + client: *Client, + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); + log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer client.deinitRequest(req); + + fulfillInner(req, status, headers, body) catch |err| { + req.error_callback(req.ctx, err); + return err; + }; +} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig new file mode 100644 index 00000000..1bfae1b6 --- /dev/null +++ b/src/network/layer/RobotsLayer.zig @@ -0,0 +1,261 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const URL = @import("../../browser/URL.zig"); +const Robots = @import("../Robots.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const RobotsLayer = @This(); + +next: Layer = undefined, +allocator: std.mem.Allocator, +pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty, + +pub fn layer(self: *RobotsLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { + var it = self.pending.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(allocator); + } + self.pending.deinit(allocator); +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); + + const arena = req.params.arena; + const robots_url = try URL.getRobotsUrl(arena, req.params.url); + + if (client.network.robot_store.get(robots_url)) |robot_entry| { + switch (robot_entry) { + .present => |robots| { + const path = URL.getPathname(req.params.url); + + if (!robots.isAllowed(path)) { + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); + return error.RobotsBlocked; + } + }, + .absent => {}, + } + return self.next.request(client, req); + } + + return self.fetchRobotsThenRequest(client, robots_url, req); +} + +fn fetchRobotsThenRequest( + self: *RobotsLayer, + client: *Client, + robots_url: [:0]const u8, + req: Request, +) !void { + const entry = try self.pending.getOrPut(self.allocator, robots_url); + + if (!entry.found_existing) { + errdefer std.debug.assert(self.pending.remove(robots_url)); + entry.value_ptr.* = .empty; + + // This arena is later owned by the Request. It does not need to be cleaned up by us because + // it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers. + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext"); + errdefer client.network.app.arena_pool.release(new_arena); + + const robots_ctx = try new_arena.create(RobotsContext); + robots_ctx.* = .{ + .layer = self, + .client = client, + .arena = new_arena, + .robots_url = robots_url, + .buffer = .empty, + }; + + const headers = try client.newHeaders(); + log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + + try self.next.request(client, .{ + .ctx = robots_ctx, + .params = .{ + // We have to do this ourselves because we are not going through the top level `request`. + .arena = new_arena, + .request_id = client.incrReqId(), + .url = robots_url, + .method = .GET, + .headers = headers, + .frame_id = req.params.frame_id, + .loader_id = req.params.loader_id, + .cookie_jar = req.params.cookie_jar, + .cookie_origin = req.params.cookie_origin, + .notification = req.params.notification, + .resource_type = .fetch, + }, + .header_callback = RobotsContext.headerCallback, + .data_callback = RobotsContext.dataCallback, + .done_callback = RobotsContext.doneCallback, + .error_callback = RobotsContext.errorCallback, + .shutdown_callback = RobotsContext.shutdownCallback, + }); + } + + try entry.value_ptr.append(self.allocator, req); +} + +fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPending: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + if (!allowed) { + log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); + defer client.deinitRequest(queued_req); + queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); + } else { + self.next.request(client, queued_req) catch |e| { + defer client.deinitRequest(queued_req); + queued_req.error_callback(queued_req.ctx, e); + }; + } + } +} + +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPendingShutdown: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + defer client.deinitRequest(queued_req); + if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); + } +} + +const RobotsContext = struct { + layer: *RobotsLayer, + arena: std.mem.Allocator, + client: *Client, + robots_url: [:0]const u8, + buffer: std.ArrayListUnmanaged(u8), + status: u16 = 0, + + fn deinit(self: *RobotsContext) void { + self.buffer.deinit(self.layer.allocator); + self.layer.allocator.destroy(self); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + switch (response.inner) { + .transfer => |t| { + if (t.response_header) |hdr| { + log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url }); + self.status = hdr.status; + } + if (t.getContentLength()) |cl| { + try self.buffer.ensureTotalCapacity(self.arena, cl); + } + }, + else => {}, + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + try self.buffer.appendSlice(self.arena, data); + } + + fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + var allowed = true; + const network = client.network; + + switch (self.status) { + 200 => { + if (self.buffer.items.len > 0) { + const robots: ?Robots = network.robot_store.robotsFromBytes( + network.config.http_headers.user_agent, + self.buffer.items, + ) catch blk: { + log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url }); + try network.robot_store.putAbsent(robots_url); + break :blk null; + }; + if (robots) |r| { + try network.robot_store.put(robots_url, r); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url); + allowed = r.isAllowed(path); + } + } + }, + 404 => { + log.debug(.http, "robots not found", .{ .url = robots_url }); + try network.robot_store.putAbsent(robots_url); + }, + else => { + log.debug(.http, "unexpected status on robots", .{ + .url = robots_url, + .status = self.status, + }); + try network.robot_store.putAbsent(robots_url); + }, + } + + l.flushPending(client, robots_url, allowed); + } + + fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + log.warn(.http, "robots fetch failed", .{ .err = err }); + l.flushPending(client, robots_url, true); + } + + fn shutdownCallback(ctx_ptr: *anyopaque) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + log.debug(.http, "robots fetch shutdown", .{}); + l.flushPendingShutdown(robots_url, client); + } +}; diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig new file mode 100644 index 00000000..7e67af49 --- /dev/null +++ b/src/network/layer/WebBotAuthLayer.zig @@ -0,0 +1,51 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const URL = @import("../../browser/URL.zig"); +const WebBotAuth = @import("../WebBotAuth.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const WebBotAuthLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *WebBotAuthLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); + var our_req = req; + + const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + + const arena = req.params.arena; + const authority = URL.getHost(req.params.url); + try wba.signRequest(arena, &our_req.params.headers, authority); + + return self.next.request(client, our_req); +}