diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 35584881..9dd32da0 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -63,8 +63,16 @@ jobs: zig-out/bin/lightpanda retention-days: 1 - demo-scripts: - name: demo-scripts + demo-runner: + strategy: + fail-fast: false + matrix: + proxy: [true, false] + wba: [true, false] + cache: [true, false] + robotstxt: [true, false] + + name: demo-runner needs: zig-build-release runs-on: ubuntu-latest @@ -85,36 +93,54 @@ jobs: - run: chmod a+x ./lightpanda - - name: run end to end tests - run: | - ./lightpanda serve & echo $! > LPD.pid - go run runner/main.go - kill `cat LPD.pid` - - - name: build proxy + - if: matrix.proxy == true + name: build and start proxy run: | cd proxy go build + ./proxy & echo $! > PROXY.id - - name: run end to end tests through proxy + - if: matrix.cache == true + run: mkdir /tmp/lp-cache + + - if: matrix.wba == true + run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem + + - id: args + name: build LP args run: | - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve --http-proxy 'http://127.0.0.1:3000' & echo $! > LPD.pid + args="" + [ "${{ matrix.proxy }}" = "true" ] && args="$args --http-proxy http://127.0.0.1:3000" + [ "${{ matrix.cache }}" = "true" ] && args="$args --http-cache-dir /tmp/lp-cache" + [ "${{ matrix.robotstxt }}" = "true" ] && args="$args --obey-robots" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-key-file private_key.pem" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-domain ${{ vars.WBA_DOMAIN }}" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }}" + echo $args + echo "value=$args" >> "$GITHUB_OUTPUT" + + - run: | + ./lightpanda serve ${{ steps.args.outputs.value }} & echo $! > LPD.pid + + - run: | go run runner/main.go - URL=https://demo-browser.lightpanda.io/campfire-commerce/ node puppeteer/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` - - name: run request interception through proxy and playwright + - run: | + kill `cat LPD.pid` + + - if: matrix.proxy == true run: | - export PROXY_USERNAME=username PROXY_PASSWORD=password - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve & echo $! > LPD.pid - BASE_URL=https://demo-browser.lightpanda.io/ node playwright/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + pkill proxy - # e2e tests w/ web-bot-auth configuration on. - wba-demo-scripts: - name: wba-demo-scripts + proxy-auth: + strategy: + fail-fast: false + matrix: + wba: [true, false] + cache: [true, false] + robotstxt: [true, false] + + name: demo-runner needs: zig-build-release runs-on: ubuntu-latest @@ -135,47 +161,46 @@ jobs: - run: chmod a+x ./lightpanda - - run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem - - - name: run end to end tests - run: | - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - & echo $! > LPD.pid - go run runner/main.go - kill `cat LPD.pid` - - - name: build proxy + - name: build and start proxy run: | cd proxy go build + ./proxy & echo $! > PROXY.id + + - if: matrix.cache == true + run: mkdir /tmp/lp-cache + + - if: matrix.wba == true + run: echo "${{ secrets.WBA_PRIVATE_KEY_PEM }}" > private_key.pem + + - id: args + name: build LP args + run: | + args="" + [ "${{ matrix.cache }}" = "true" ] && args="$args --http-cache-dir /tmp/lp-cache" + [ "${{ matrix.robotstxt }}" = "true" ] && args="$args --obey-robots" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-key-file private_key.pem" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-domain ${{ vars.WBA_DOMAIN }}" + [ "${{ matrix.wba }}" = "true" ] && args="$args --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }}" + echo $args + echo "value=$args" >> "$GITHUB_OUTPUT" - name: run end to end tests through proxy run: | - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - --http-proxy 'http://127.0.0.1:3000' \ - & echo $! > LPD.pid + export PROXY_USERNAME=username PROXY_PASSWORD=password + ./lightpanda serve --http-proxy http://127.0.0.1:3000 ${{ steps.args.outputs.value }} & echo $! > LPD.pid go run runner/main.go URL=https://demo-browser.lightpanda.io/campfire-commerce/ node puppeteer/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + kill `cat LPD.pid` - name: run request interception through proxy and playwright run: | export PROXY_USERNAME=username PROXY_PASSWORD=password - ./proxy/proxy & echo $! > PROXY.id - ./lightpanda serve \ - --web-bot-auth-key-file private_key.pem \ - --web-bot-auth-keyid ${{ vars.WBA_KEY_ID }} \ - --web-bot-auth-domain ${{ vars.WBA_DOMAIN }} \ - & echo $! > LPD.pid + ./lightpanda serve ${{ steps.args.outputs.value }} & echo $! > LPD.pid BASE_URL=https://demo-browser.lightpanda.io/ node playwright/proxy_auth.js - kill `cat LPD.pid` `cat PROXY.id` + kill `cat LPD.pid` + + - run: pkill proxy wba-test: name: wba-test diff --git a/src/Config.zig b/src/Config.zig index 9c3023b8..f6bbd280 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -60,6 +60,18 @@ fn logFilterScopesValidator(allocator: Allocator, args: *std.process.ArgIterator } } +fn logLevelValidator(_: Allocator, args: *std.process.ArgIterator) !?log.Level { + const str = args.next() orelse return error.MissingArgument; + if (std.mem.eql(u8, str, "error")) { + return .err; + } + + return std.meta.stringToEnum(log.Level, str) orelse { + log.fatal(.app, "invalid option choice", .{ .arg = "--log-level", .value = str }); + return error.InvalidArgument; + }; +} + /// Common CLI args. const CommonOptions = .{ .{ .name = "obey_robots", .type = bool }, @@ -72,7 +84,7 @@ const CommonOptions = .{ .{ .name = "http_max_response_size", .type = ?usize }, .{ .name = "ws_max_concurrent", .type = ?u8 }, .{ .name = "insecure_disable_tls_host_verification", .type = bool }, - .{ .name = "log_level", .type = ?log.Level }, + .{ .name = "log_level", .type = ?log.Level, .validator = logLevelValidator }, .{ .name = "log_format", .type = ?log.Format }, .{ .name = "log_filter_scopes", .type = log.Scope, .multiple = true, .validator = logFilterScopesValidator }, .{ .name = "user_agent_suffix", .type = ?[]const u8 }, diff --git a/src/Notification.zig b/src/Notification.zig index d01d29ce..f429e7b9 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,6 +21,9 @@ const lp = @import("lightpanda"); const Frame = @import("browser/Frame.zig"); const Transfer = @import("browser/HttpClient.zig").Transfer; +const Request = @import("browser/HttpClient.zig").Request; +const Response = @import("browser/HttpClient.zig").Response; +const InterceptContext = @import("network/layer/InterceptionLayer.zig").InterceptContext; const log = lp.log; const List = std.DoublyLinkedList; @@ -162,11 +165,11 @@ pub const FrameLoaded = struct { }; pub const RequestStart = struct { - transfer: *Transfer, + request: *Request, }; pub const RequestIntercept = struct { - transfer: *Transfer, + request: *Request, wait_for_interception: *bool, }; @@ -177,19 +180,21 @@ pub const RequestAuthRequired = struct { pub const ResponseData = struct { data: []const u8, - transfer: *Transfer, + request: *Request, }; pub const ResponseHeaderDone = struct { - transfer: *Transfer, + request: *Request, + response: *const Response, }; pub const RequestDone = struct { - transfer: *Transfer, + request: *Request, + content_length: usize, }; pub const RequestFail = struct { - transfer: *Transfer, + request: *Request, err: anyerror, }; diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index 0f819ecb..20f1332e 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -623,6 +623,10 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo if (opts.header) |hdr| { try headers.add(hdr); } + if (opts.referer) |ref| { + const ref_header = try std.mem.concatWithSentinel(self.arena, u8, &.{ "Referer: ", ref }, 0); + try headers.add(ref_header); + } // We dispatch frame_navigate event before sending the request. // It ensures the event frame_navigated is not dispatched before this one. session.notification.dispatch(.frame_navigate, &.{ @@ -644,16 +648,18 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo http_client.request(.{ .ctx = self, - .url = self.url, - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .method = opts.method, - .headers = headers, - .body = opts.body, - .cookie_jar = &session.cookie_jar, - .cookie_origin = self.url, - .resource_type = .document, - .notification = self._session.notification, + .params = .{ + .url = self.url, + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .method = opts.method, + .headers = headers, + .body = opts.body, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.url, + .resource_type = .document, + .notification = self._session.notification, + }, .header_callback = frameHeaderDoneCallback, .data_callback = frameDataCallback, .done_callback = frameDoneCallback, @@ -755,9 +761,18 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url session.browser.http_client.abortFrame(target._frame_id); } + // Capture the originating frame's URL as the Referer for this + // navigation. The originator's frame may be torn down before navigate() + // runs (processRootQueuedNavigation rebuilds the Page in-place), so dup + // into the QueuedNavigation arena which outlives that tear-down. + var nav_opts = opts; + if (nav_opts.referer == null and std.mem.startsWith(u8, originator.url, "http")) { + nav_opts.referer = try arena.dupe(u8, originator.url); + } + const qn = try arena.create(QueuedNavigation); qn.* = .{ - .opts = opts, + .opts = nav_opts, .arena = arena, .url = resolved_url, .is_about_blank = is_about_blank, @@ -1280,7 +1295,12 @@ pub fn iframeAddedCallback(self: *Frame, iframe: *IFrame) !void { ); }; - new_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| { + new_frame.navigate(url, .{ + .reason = .initialFrameNavigation, + // Iframe's initial src request carries the parent's URL as Referer. + // Parent frame outlives this navigate() call, so the slice is safe. + .referer = if (std.mem.startsWith(u8, self.url, "http")) self.url else null, + }) catch |err| { log.warn(.frame, "iframe navigate failure", .{ .url = url, .err = err }); self._pending_loads -= 1; iframe._window = null; @@ -3480,6 +3500,10 @@ pub const NavigateOpts = struct { method: HttpClient.Method = .GET, body: ?[]const u8 = null, header: ?[:0]const u8 = null, + // Set by scheduleNavigationWithArena from the originating frame's URL so + // anchor click / form submit / location.href navigations carry a Referer. + // null on CDP Page.navigate (address-bar) and Page.reload โ€” matches Chrome. + referer: ?[]const u8 = null, force: bool = false, kind: NavigationKind = .{ .push = null }, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 0832db1a..a810c535 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -27,7 +27,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); -const Cache = @import("../network/cache/Cache.zig"); const timestamp = @import("../datetime.zig").timestamp; const log = lp.log; @@ -40,8 +39,12 @@ pub const Method = http.Method; pub const Headers = http.Headers; pub const ResponseHead = http.ResponseHead; pub const HeaderIterator = http.HeaderIterator; -const CacheMetadata = Cache.CachedMetadata; -const CachedResponse = Cache.CachedResponse; +const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse; + +pub const CacheLayer = @import("../network/layer/CacheLayer.zig"); +pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig"); +pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig"); +pub const InterceptionLayer = @import("../network/layer/InterceptionLayer.zig"); // This is loosely tied to a browser Page. Loading all the , doing // XHR requests, and loading imports all happens through here. Sine the app @@ -62,15 +65,6 @@ ws_active: usize = 0, // Count of active http requests http_active: usize = 0, -// Count of intercepted requests. This is to help deal with intercepted requests. -// The client doesn't track intercepted transfers. If a request is intercepted, -// the client forgets about it and requires the interceptor to continue or abort -// it. That works well, except if we only rely on active, we might think there's -// no more network activity when, with interecepted requests, there might be more -// in the future. (We really only need this to properly emit a 'networkIdle' and -// 'networkAlmostIdle' Page.lifecycleEvent in CDP). -intercepted: usize = 0, - // Our curl multi handle. handles: http.Handles, @@ -101,10 +95,6 @@ allocator: Allocator, network: *Network, -// Queue of requests that depend on a robots.txt. -// Allows us to fetch the robots.txt just once. -pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, - // Once we have a handle/easy to process a request with, we create a Transfer // which contains the Request as well as any state we need to process the // request. These will come and go with each request. @@ -134,6 +124,30 @@ cdp_client: ?CDPClient = null, max_response_size: usize, +cache_layer: CacheLayer, +robots_layer: RobotsLayer, +web_bot_auth_layer: WebBotAuthLayer, +interception_layer: InterceptionLayer, +entry_layer: Layer, + +pub const Layer = struct { + ptr: *anyopaque, + vtable: *const VTable, + + pub const VTable = struct { + request: *const fn (*anyopaque, *Client, Request) anyerror!void, + }; + + pub fn request(self: Layer, client: *Client, req: Request) !void { + return self.vtable.request(self.ptr, client, req); + } +}; + +fn layerWith(self: anytype, next: Layer) Layer { + self.next = next; + return self.layer(); +} + // libcurl can monitor arbitrary sockets, this lets us use libcurl to poll // both HTTP data as well as messages from an CDP connection. // Furthermore, we have some tension between blocking scripts and request @@ -175,8 +189,32 @@ pub fn init(allocator: Allocator, network: *Network) !*Client { .tls_verify = network.config.tlsVerifyHost(), .obey_robots = network.config.obeyRobots(), .max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32), + + .cache_layer = .{}, + .robots_layer = .{ .allocator = allocator }, + .web_bot_auth_layer = .{}, + .interception_layer = .{}, + .entry_layer = undefined, }; + var next = client.layer(); + + if (network.config.obeyRobots()) { + next = layerWith(&client.robots_layer, next); + } + + if (network.config.httpCacheDir() != null) { + next = layerWith(&client.cache_layer, next); + } + + next = layerWith(&client.interception_layer, next); + + if (network.config.webBotAuth() != null) { + next = layerWith(&client.web_bot_auth_layer, next); + } + + client.entry_layer = next; + return client; } @@ -185,17 +223,20 @@ pub fn deinit(self: *Client) void { self.handles.deinit(); self.transfer_pool.deinit(); - - var robots_iter = self.pending_robots_queue.iterator(); - while (robots_iter.next()) |entry| { - entry.value_ptr.deinit(self.allocator); - } - self.pending_robots_queue.deinit(self.allocator); - self.clearUserAgentOverride(); + + self.robots_layer.deinit(self.allocator); + self.allocator.destroy(self); } +pub fn layer(self: *Client) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = _request }, + }; +} + // Set a user agent override. Both the raw UA string and the pre-formatted // "User-Agent: " header string are allocated from self.allocator. pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void { @@ -285,7 +326,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { const transfer: *Transfer = @fieldParentPtr("_node", node); if (comptime abort_all) { transfer.kill(); - } else if (transfer.req.frame_id == frame_id) { + } else if (transfer.req.params.frame_id == frame_id) { q.remove(node); transfer.kill(); } @@ -322,7 +363,7 @@ fn abortConnections(list: std.DoublyLinkedList, comptime abort_all: bool, frame_ const conn: *http.Connection = @fieldParentPtr("node", node); switch (conn.transport) { .http => |transfer| { - if ((comptime abort_all) or transfer.req.frame_id == frame_id) { + if ((comptime abort_all) or transfer.req.params.frame_id == frame_id) { transfer.kill(); } }, @@ -349,346 +390,104 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { return self.perform(@intCast(timeout_ms)); } -pub fn request(self: *Client, req: Request) !void { - if (self.obey_robots == false) { - return self.processRequest(req); - } - - const robots_url = try URL.getRobotsUrl(self.allocator, req.url); - errdefer self.allocator.free(robots_url); - - // If we have this robots cached, we can take a fast path. - if (self.network.robot_store.get(robots_url)) |robot_entry| { - defer self.allocator.free(robots_url); - - switch (robot_entry) { - // If we have a found robots entry, we check it. - .present => |robots| { - const path = URL.getPathname(req.url); - if (!robots.isAllowed(path)) { - req.error_callback(req.ctx, error.RobotsBlocked); - return; - } - }, - // Otherwise, we assume we won't find it again. - .absent => {}, - } - - return self.processRequest(req); - } - return self.fetchRobotsThenProcessRequest(robots_url, req); -} - -fn serveFromCache(req: Request, cached: *const CachedResponse) !void { - const response = Response.fromCached(req.ctx, cached); - defer switch (cached.data) { - .buffer => |_| {}, - .file => |f| f.file.close(), - }; - - if (req.start_callback) |cb| { - try cb(response); - } - - const proceed = try req.header_callback(response); - if (!proceed) { - req.error_callback(req.ctx, error.Abort); - return; - } - - switch (cached.data) { - .buffer => |data| { - if (data.len > 0) { - try req.data_callback(response, data); - } - }, - .file => |f| { - const file = f.file; - - var buf: [1024]u8 = undefined; - var file_reader = file.reader(&buf); - try file_reader.seekTo(f.offset); - const reader = &file_reader.interface; - - var read_buf: [1024]u8 = undefined; - var remaining = f.len; - - while (remaining > 0) { - const read_len = @min(read_buf.len, remaining); - const n = try reader.readSliceShort(read_buf[0..read_len]); - if (n == 0) break; - remaining -= n; - try req.data_callback(response, read_buf[0..n]); - } - }, - } - - try req.done_callback(req.ctx); -} - -fn processRequest(self: *Client, req: Request) !void { - if (self.network.cache) |*cache| { - if (req.method == .GET) { - // cache is only used to read the meta data - const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); - defer self.network.app.arena_pool.release(arena); - - var iter = req.headers.iterator(); - const req_header_list = try iter.collect(arena); - - if (cache.get(arena, .{ - .url = req.url, - .timestamp = std.time.timestamp(), - .request_headers = req_header_list.items, - })) |cached| { - defer req.headers.deinit(); - return serveFromCache(req, &cached); - } - } - } - +pub fn _request(ptr: *anyopaque, _: *Client, req: Request) !void { + const self: *Client = @ptrCast(@alignCast(ptr)); const transfer = try self.makeTransfer(req); - - transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); - - var wait_for_interception = false; - transfer.req.notification.dispatch(.http_request_intercept, &.{ - .transfer = transfer, - .wait_for_interception = &wait_for_interception, - }); - if (wait_for_interception == false) { - // request not intercepted, process it normally - return self.process(transfer); - } - - self.intercepted += 1; - if (comptime IS_DEBUG) { - log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); - } - transfer._intercept_state = .pending; - - if (req.blocking == false) { - // The request was interecepted, but it isn't a blocking request, so we - // dont' need to block this call. The request will be unblocked - // asynchronously via either continueTransfer or abortTransfer - return; - } - - if (try self.waitForInterceptedResponse(transfer)) { - return self.process(transfer); - } + return self.process(transfer); } -const RobotsRequestContext = struct { - client: *Client, - req: Request, - robots_url: [:0]const u8, - buffer: std.ArrayList(u8), - status: u16 = 0, +pub fn request(self: *Client, req: Request) !void { + // Assign Request Id. + var our_req = req; + our_req.params.request_id = self.incrReqId(); - pub fn deinit(self: *RobotsRequestContext) void { - self.client.allocator.free(self.robots_url); - self.buffer.deinit(self.client.allocator); - self.client.allocator.destroy(self); + const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena"); + our_req.params.arena = arena; + + return self.entry_layer.request(self, our_req) catch |err| { + our_req.error_callback(our_req.ctx, err); + self.deinitRequest(our_req); + return err; + }; +} + +const SyncContext = struct { + allocator: Allocator, + completion: union(enum) { + in_progress: void, + done: void, + err: anyerror, + shutdown: void, + } = .in_progress, + + status: u16 = 0, + body: std.ArrayList(u8), + + fn headerCallback(response: Response) anyerror!bool { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + lp.assert(response.status() != null, "HttpClient.SyncRequest.headerCallback", .{ .value = response.status() }); + self.status = response.status().?; + if (response.contentLength()) |cl| { + try self.body.ensureTotalCapacity(self.allocator, cl); + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(response.ctx)); + try self.body.appendSlice(self.allocator, data); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .done; + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .{ .err = err }; + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *SyncContext = @ptrCast(@alignCast(ctx)); + self.completion = .shutdown; } }; -fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void { - const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url); +pub fn syncRequest(self: *Client, allocator: Allocator, params: RequestParams) !SyncResponse { + var sync_ctx = SyncContext{ .allocator = allocator, .body = .empty }; + errdefer sync_ctx.body.deinit(allocator); - if (!entry.found_existing) { - errdefer self.allocator.free(robots_url); + try self.request(.{ + .params = params, + .ctx = &sync_ctx, + .header_callback = SyncContext.headerCallback, + .data_callback = SyncContext.dataCallback, + .done_callback = SyncContext.doneCallback, + .error_callback = SyncContext.errorCallback, + .shutdown_callback = SyncContext.shutdownCallback, + }); - // If we aren't already fetching this robots, - // we want to create a new queue for it and add this request into it. - entry.value_ptr.* = .empty; - - const ctx = try self.allocator.create(RobotsRequestContext); - errdefer self.allocator.destroy(ctx); - ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty }; - const headers = try self.newHeaders(); - - log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); - try self.processRequest(.{ - .ctx = ctx, - .url = robots_url, - .method = .GET, - .headers = headers, - .blocking = false, - .frame_id = req.frame_id, - .loader_id = req.loader_id, - .cookie_jar = req.cookie_jar, - .cookie_origin = req.cookie_origin, - .notification = req.notification, - .resource_type = .fetch, - .header_callback = robotsHeaderCallback, - .data_callback = robotsDataCallback, - .done_callback = robotsDoneCallback, - .error_callback = robotsErrorCallback, - .shutdown_callback = robotsShutdownCallback, - }); - } else { - // Not using our own robots URL, only using the one from the first request. - self.allocator.free(robots_url); - } - - try entry.value_ptr.append(self.allocator, req); -} - -fn robotsHeaderCallback(response: Response) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - // Robots callbacks only happen on real live requests. - const transfer = response.inner.transfer; - - if (transfer.response_header) |hdr| { - log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); - ctx.status = hdr.status; - } - - if (transfer.getContentLength()) |cl| { - try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl); - } - - return true; -} - -fn robotsDataCallback(response: Response, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); - try ctx.buffer.appendSlice(ctx.client.allocator, data); -} - -fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - var allowed = true; - - switch (ctx.status) { - 200 => { - if (ctx.buffer.items.len > 0) { - const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes( - ctx.client.getUserAgent(), - ctx.buffer.items, - ) catch blk: { - log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); - // If we fail to parse, we just insert it as absent and ignore. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - break :blk null; - }; - - if (robots) |r| { - try ctx.client.network.robot_store.put(ctx.robots_url, r); - const path = URL.getPathname(ctx.req.url); - allowed = r.isAllowed(path); - } - } - }, - 404 => { - log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); - // If we get a 404, we just insert it as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - else => { - log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status }); - // If we get an unexpected status, we just insert as absent. - try ctx.client.network.robot_store.putAbsent(ctx.robots_url); - }, - } - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsDoneCallbacke empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (!allowed) { - log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); - queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); - } else { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } - } -} - -fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.warn(.http, "robots fetch failed", .{ .err = err }); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - // On error, allow all queued requests to proceed - for (queued.value.items) |queued_req| { - ctx.client.processRequest(queued_req) catch |e| { - queued_req.error_callback(queued_req.ctx, e); - }; - } -} - -fn robotsShutdownCallback(ctx_ptr: *anyopaque) void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); - defer ctx.deinit(); - - log.debug(.http, "robots fetch shutdown", .{}); - - var queued = ctx.client.pending_robots_queue.fetchRemove( - ctx.robots_url, - ) orelse @panic("Client.robotsErrorCallback empty queue"); - defer queued.value.deinit(ctx.client.allocator); - - for (queued.value.items) |queued_req| { - if (queued_req.shutdown_callback) |shutdown_cb| { - shutdown_cb(queued_req.ctx); - } - } -} - -fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { - // The request was intercepted and is blocking. This is messy, but our - // callers, the ScriptManager -> Page, don't have a great way to stop the - // parser and return control to the CDP server to wait for the interception - // response. We have some information on the CDPClient, so we'll do the - // blocking here. (This is a bit of a legacy thing. Initially the Client - // had a 'extra_socket' that it could monitor. It was named 'extra_socket' - // to appear generic, but really, that 'extra_socket' was always the CDP - // socket. Because we already had the "extra_socket" here, it was easier to - // make it even more CDP- aware and turn `extra_socket: socket_t` into the - // current CDPClient and do the blocking here). - const cdp_client = self.cdp_client.?; - const ctx = cdp_client.ctx; - - if (cdp_client.blocking_read_start(ctx) == false) { - return error.BlockingInterceptFailure; - } - - defer _ = cdp_client.blocking_read_end(ctx); - - while (true) { - if (cdp_client.blocking_read(ctx) == false) { - return error.BlockingInterceptFailure; - } - - switch (transfer._intercept_state) { - .pending => continue, // keep waiting - .@"continue" => return true, - .abort => |err| { - transfer.abort(err); - return false; + while (sync_ctx.completion == .in_progress) { + const status = try self.tick(200); + log.debug(.http, "sync request tick", .{ .status = status }); + switch (status) { + .cdp_socket => { + const cdp = self.cdp_client.?; + _ = cdp.blocking_read(cdp.ctx); }, - .fulfilled => { - // callbacks already called, just need to cleanups - transfer.deinit(); - return false; - }, - .not_intercepted => unreachable, + .normal => continue, } } + + switch (sync_ctx.completion) { + .in_progress => @panic("Impossible to be in progress here."), + .done, .shutdown => return .{ + .status = sync_ctx.status, + .body = sync_ctx.body, + }, + .err => |e| return e, + } } // Above, request will not process if there's an interception request. In such @@ -706,50 +505,6 @@ fn process(self: *Client, transfer: *Transfer) !void { self.queue.append(&transfer._node); } -// For an intercepted request -pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.blocking) { - return self.process(transfer); - } - transfer._intercept_state = .@"continue"; -} - -// For an intercepted request -pub fn abortTransfer(self: *Client, transfer: *Transfer) void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - if (!transfer.req.blocking) { - transfer.abort(error.Abort); - } - transfer._intercept_state = .{ .abort = error.Abort }; -} - -// For an intercepted request -pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (comptime IS_DEBUG) { - std.debug.assert(transfer._intercept_state != .not_intercepted); - log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); - } - self.intercepted -= 1; - - try transfer.fulfill(status, headers, body); - if (!transfer.req.blocking) { - transfer.deinit(); - return; - } - transfer._intercept_state = .fulfilled; -} - pub fn nextReqId(self: *Client) u32 { return self.next_request_id +% 1; } @@ -761,17 +516,13 @@ pub fn incrReqId(self: *Client) u32 { } fn makeTransfer(self: *Client, req: Request) !*Transfer { - errdefer req.headers.deinit(); - const transfer = try self.transfer_pool.create(); errdefer self.transfer_pool.destroy(transfer); - const id = self.incrReqId(); transfer.* = .{ .start_time = timestamp(.monotonic), - .arena = ArenaAllocator.init(self.allocator), - .id = id, - .url = req.url, + .id = req.params.request_id, + .url = req.params.url, .req = req, .client = self, }; @@ -788,11 +539,6 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: transfer._notified_fail = true; - transfer.req.notification.dispatch(.http_request_fail, &.{ - .transfer = transfer, - .err = err, - }); - if (execute_callback) { transfer.req.error_callback(transfer.req.ctx, err); } else if (transfer.req.shutdown_callback) |cb| { @@ -900,9 +646,6 @@ fn perform(self: *Client, timeout_ms: c_int) anyerror!PerformStatus { } fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *Transfer) !bool { - // Detect auth challenge from response headers. - // Also check on RecvError: proxy may send 407 with headers before - // closing the connection (CONNECT tunnel not yet established). if (msg.err == null or msg.err.? == error.RecvError) { transfer.detectAuthChallenge(msg.conn); } @@ -911,43 +654,21 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // TODO give a way to configure the number of auth retries. if (transfer._auth_challenge != null and transfer._tries < 10) { var wait_for_interception = false; - transfer.req.notification.dispatch( + transfer.req.params.notification.dispatch( .http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, ); if (wait_for_interception) { - self.intercepted += 1; + self.interception_layer.intercepted += 1; if (comptime IS_DEBUG) { - log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted }); + log.debug(.http, "wait for auth interception", .{ .intercepted = self.interception_layer.intercepted }); } - transfer._intercept_state = .pending; // Whether or not this is a blocking request, we're not going // to process it now. We can end the transfer, which will // release the easy handle back into the pool. The transfer // is still valid/alive (just has no handle). transfer.releaseConn(); - if (!transfer.req.blocking) { - // In the case of an async request, we can just "forget" - // about this transfer until it gets updated asynchronously - // from some CDP command. - return false; - } - - // In the case of a sync request, we need to block until we - // get the CDP command for handling this case. - if (try self.waitForInterceptedResponse(transfer)) { - // we've been asked to continue with the request - // we can't process it here, since we're already inside - // a process, so we need to queue it and wait for the - // next tick (this is why it was safe to releaseConn - // above, because even in the "blocking" path, we still - // only process it on the next tick). - self.queue.append(&transfer._node); - } else { - // aborted, already cleaned up - } - return false; } } @@ -1017,34 +738,18 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T if (transfer._stream_buffer.items.len > 0) { try transfer.req.data_callback(Response.fromTransfer(transfer), body); - transfer.req.notification.dispatch(.http_response_data, &.{ - .data = body, - .transfer = transfer, - }); - if (transfer.aborted) { transfer.requestFailed(error.Abort, true); return true; } } - if (transfer._pending_cache_metadata) |metadata| { - const cache = &self.network.cache.?; - cache.put(metadata.*, body) catch |err| { - log.warn(.cache, "cache put failed", .{ .err = err }); - }; - } - // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); try transfer.req.done_callback(transfer.req.ctx); - transfer.req.notification.dispatch(.http_request_done, &.{ - .transfer = transfer, - }); - return true; } @@ -1154,7 +859,12 @@ fn ensureNoActiveConnection(self: *const Client) !void { } } -pub const Request = struct { +pub const RequestParams = struct { + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + arena: Allocator = undefined, + /// This is unsafe to access until you pass it to `Client.request()` where it gets assigned. + request_id: u32 = undefined, + frame_id: u32, loader_id: u32, method: Method, @@ -1168,23 +878,6 @@ pub const Request = struct { notification: *Notification, timeout_ms: u32 = 0, - // This is only relevant for intercepted requests. If a request is flagged - // as blocking AND is intercepted, then it'll be up to us to wait until - // we receive a response to the interception. This probably isn't ideal, - // but it's harder for our caller (ScriptManager) to deal with this. One - // reason for that is the Http Client is already a bit CDP-aware. - blocking: bool = false, - - // arbitrary data that can be associated with this request - ctx: *anyopaque = undefined, - - start_callback: ?*const fn (response: Response) anyerror!void = null, - header_callback: *const fn (response: Response) anyerror!bool, - data_callback: *const fn (response: Response, data: []const u8) anyerror!void, - done_callback: *const fn (ctx: *anyopaque) anyerror!void, - error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, - shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, - const ResourceType = enum { document, xhr, @@ -1204,6 +897,62 @@ pub const Request = struct { }; } }; + + pub fn deinit(self: *const RequestParams) void { + self.headers.deinit(); + } +}; + +pub const Request = struct { + pub const StartCallback = *const fn (response: Response) anyerror!void; + pub const HeaderCallback = *const fn (response: Response) anyerror!bool; + pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void; + pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void; + pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void; + pub const ShutdownCallback = *const fn (ctx: *anyopaque) void; + + params: RequestParams, + // arbitrary data that can be associated with this request + ctx: *anyopaque = undefined, + + start_callback: ?StartCallback = null, + header_callback: HeaderCallback, + data_callback: DataCallback, + done_callback: DoneCallback, + error_callback: ErrorCallback, + shutdown_callback: ?ShutdownCallback = null, + + pub fn getCookieString(self: *Request) !?[:0]const u8 { + const jar = self.params.cookie_jar orelse return null; + var aw: std.Io.Writer.Allocating = .init(self.params.arena); + try jar.forRequest(self.params.url, &aw.writer, .{ + .is_http = true, + .origin_url = self.params.cookie_origin, + .is_navigation = self.params.resource_type == .document, + }); + const written = aw.written(); + if (written.len == 0) return null; + try aw.writer.writeByte(0); + return written.ptr[0..written.len :0]; + } + + pub fn deinit(self: *const Request) void { + self.params.deinit(); + } +}; + +pub const FulfilledResponse = struct { + status: u16, + url: [:0]const u8, + headers: []const http.Header, + body: ?[]const u8, + + pub fn contentType(self: *const FulfilledResponse) ?[]const u8 { + for (self.headers) |hdr| { + if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) return hdr.value; + } + return null; + } }; pub const Response = struct { @@ -1211,6 +960,7 @@ pub const Response = struct { inner: union(enum) { transfer: *Transfer, cached: *const CachedResponse, + fulfilled: *const FulfilledResponse, }, pub fn fromTransfer(transfer: *Transfer) Response { @@ -1221,10 +971,15 @@ pub const Response = struct { return .{ .ctx = ctx, .inner = .{ .cached = resp } }; } + pub fn fromFulfilled(ctx: *anyopaque, fulfilled: *const FulfilledResponse) Response { + return .{ .ctx = ctx, .inner = .{ .fulfilled = fulfilled } }; + } + pub fn status(self: Response) ?u16 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.status else null, .cached => |c| c.metadata.status, + .fulfilled => |f| f.status, }; } @@ -1232,6 +987,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| if (t.response_header) |*rh| rh.contentType() else null, .cached => |c| c.metadata.content_type, + .fulfilled => |f| f.contentType(), }; } @@ -1242,13 +998,14 @@ pub const Response = struct { .buffer => |buf| @intCast(buf.len), .file => |f| @intCast(f.len), }, + .fulfilled => |f| if (f.body) |b| @intCast(b.len) else null, }; } pub fn redirectCount(self: Response) ?u32 { return switch (self.inner) { .transfer => |t| if (t.response_header) |rh| rh.redirect_count else null, - .cached => 0, + .cached, .fulfilled => 0, }; } @@ -1256,6 +1013,7 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.url, .cached => |c| c.metadata.url, + .fulfilled => |f| f.url, }; } @@ -1263,13 +1021,14 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| t.responseHeaderIterator(), .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + .fulfilled => |f| HeaderIterator{ .list = .{ .list = f.headers } }, }; } pub fn abort(self: Response, err: anyerror) void { switch (self.inner) { .transfer => |t| t.abort(err), - .cached => {}, + .cached, .fulfilled => {}, } } @@ -1277,12 +1036,21 @@ pub const Response = struct { return switch (self.inner) { .transfer => |t| try t.format(writer), .cached => |c| try c.format(writer), + .fulfilled => |f| try writer.print("fulfilled {s}", .{f.url}), }; } }; +pub const SyncResponse = struct { + status: u16, + body: std.ArrayList(u8), + + pub fn deinit(self: *SyncResponse, allocator: Allocator) void { + self.body.deinit(allocator); + } +}; + pub const Transfer = struct { - arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, @@ -1290,7 +1058,6 @@ pub const Transfer = struct { // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, - _pending_cache_metadata: ?*CacheMetadata = null, start_time: u64, aborted: bool = false, @@ -1327,15 +1094,6 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, - _intercept_state: InterceptState = .not_intercepted, - - const InterceptState = union(enum) { - not_intercepted, - pending, - @"continue", - abort: anyerror, - fulfilled, - }; fn releaseConn(self: *Transfer) void { if (self._conn) |conn| { @@ -1350,8 +1108,7 @@ pub const Transfer = struct { self._conn = null; } - self.req.headers.deinit(); - self.arena.deinit(); + self.client.deinitRequest(self.req); self.client.transfer_pool.destroy(self); } @@ -1408,11 +1165,6 @@ pub const Transfer = struct { if (self._notified_fail) return; self._notified_fail = true; - self.req.notification.dispatch(.http_request_fail, &.{ - .transfer = self, - .err = err, - }); - if (execute_callback) { self.req.error_callback(self.req.ctx, err); } else if (self.req.shutdown_callback) |cb| { @@ -1430,38 +1182,32 @@ pub const Transfer = struct { try conn.setProxy(client.http_proxy); try conn.setTlsVerify(client.tls_verify, client.use_proxy); - try conn.setURL(req.url); - try conn.setMethod(req.method); - if (req.body) |b| { + try conn.setURL(req.params.url); + try conn.setMethod(req.params.method); + if (req.params.body) |b| { try conn.setBody(b); } else { try conn.setGetMode(); } - var header_list = req.headers; + var header_list = req.params.headers; try conn.secretHeaders(&header_list, &client.network.config.http_headers); try conn.setHeaders(&header_list); - // If we have WebBotAuth, sign our request. - if (client.network.web_bot_auth) |*wba| { - const authority = URL.getHost(req.url); - try wba.signRequest(self.arena.allocator(), &header_list, authority); - } - // Add cookies from cookie jar. - if (try self.getCookieString()) |cookies| { + if (try self.req.getCookieString()) |cookies| { try conn.setCookies(@ptrCast(cookies.ptr)); } conn.transport = .{ .http = self }; // Per-request timeout override (e.g. XHR timeout) - if (req.timeout_ms > 0) { - try conn.setTimeout(req.timeout_ms); + if (req.params.timeout_ms > 0) { + try conn.setTimeout(req.params.timeout_ms); } // add credentials - if (req.credentials) |creds| { + if (req.params.credentials) |creds| { if (self._auth_challenge != null and self._auth_challenge.?.source == .proxy) { try conn.setProxyCredentials(creds); } else { @@ -1510,23 +1256,9 @@ pub const Transfer = struct { } } - pub fn getCookieString(self: *Transfer) !?[:0]const u8 { - const jar = self.req.cookie_jar orelse return null; - var aw: std.Io.Writer.Allocating = .init(self.arena.allocator()); - try jar.forRequest(self.req.url, &aw.writer, .{ - .is_http = true, - .origin_url = self.req.cookie_origin, - .is_navigation = self.req.resource_type == .document, - }); - const written = aw.written(); - if (written.len == 0) return null; - try aw.writer.writeByte(0); - return written.ptr[0..written.len :0]; - } - pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { const req = self.req; - return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); + return writer.print("{s} {s}", .{ @tagName(req.params.method), req.params.url }); } pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { @@ -1534,13 +1266,13 @@ pub const Transfer = struct { self.url = url; // for the request itself - self.req.url = url; + self.req.params.url = url; } fn handleRedirect(transfer: *Transfer) !void { const req = &transfer.req; const conn = transfer._conn.?; - const arena = transfer.arena.allocator(); + const arena = transfer.req.params.arena; transfer._redirect_count += 1; if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) { @@ -1548,7 +1280,7 @@ pub const Transfer = struct { } // retrieve cookies from the redirect's response. - if (req.cookie_jar) |jar| { + if (req.params.cookie_jar) |jar| { var i: usize = 0; while (conn.getResponseHeader("set-cookie", i)) |ct| : (i += 1) { try jar.populateFromResponse(transfer.url, ct.value); @@ -1592,8 +1324,8 @@ pub const Transfer = struct { // 307, 308 โ†’ keep method and body. const status = try conn.getResponseCode(); if (status == 301 or status == 302 or status == 303) { - req.method = .GET; - req.body = null; + req.params.method = .GET; + req.params.body = null; } } @@ -1620,11 +1352,11 @@ pub const Transfer = struct { } pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { - self.req.credentials = userpwd; + self.req.params.credentials = userpwd; } pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const http.Header) !void { - self.req.headers.deinit(); + self.req.params.headers.deinit(); var buf: std.ArrayList(u8) = .empty; var new_headers = try self.client.newHeaders(); @@ -1636,7 +1368,7 @@ pub const Transfer = struct { try buf.append(allocator, 0); // null terminated try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); } - self.req.headers = new_headers; + self.req.params.headers = new_headers; } // abortAuthChallenge is called when an auth challenge interception is @@ -1644,15 +1376,12 @@ pub const Transfer = struct { // before interception process. pub fn abortAuthChallenge(self: *Transfer) void { if (comptime IS_DEBUG) { - std.debug.assert(self._intercept_state != .not_intercepted); - log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); + log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.interception_layer.intercepted }); } - self.client.intercepted -= 1; - if (!self.req.blocking) { - self.abort(error.AbortAuthChallenge); - return; - } - self._intercept_state = .{ .abort = error.AbortAuthChallenge }; + + self.client.interception_layer.intercepted -= 1; + self.abort(error.AbortAuthChallenge); + return; } // headerDoneCallback is called once the headers have been read. @@ -1664,7 +1393,7 @@ pub const Transfer = struct { try transfer.buildResponseHeader(conn); - if (transfer.req.cookie_jar) |jar| { + if (transfer.req.params.cookie_jar) |jar| { var i: usize = 0; while (true) { const ct = conn.getResponseHeader("set-cookie", i); @@ -1684,65 +1413,11 @@ pub const Transfer = struct { } } - transfer.req.notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; }; - if (transfer.client.network.cache != null and transfer.req.method == .GET) { - const rh = &transfer.response_header.?; - const allocator = transfer.arena.allocator(); - - const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; - - const maybe_cm = try Cache.tryCache( - allocator, - std.time.timestamp(), - transfer.url, - rh.status, - rh.contentType(), - if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, - vary, - if (conn.getResponseHeader("age", 0)) |h| h.value else null, - conn.getResponseHeader("set-cookie", 0) != null, - conn.getResponseHeader("authorization", 0) != null, - ); - - if (maybe_cm) |cm| { - var iter = transfer.responseHeaderIterator(); - var header_list = try iter.collect(allocator); - const end_of_response = header_list.items.len; - - if (vary) |vary_str| { - var req_it = transfer.req.headers.iterator(); - - while (req_it.next()) |hdr| { - var vary_iter = std.mem.splitScalar(u8, vary_str, ','); - - while (vary_iter.next()) |part| { - const name = std.mem.trim(u8, part, &std.ascii.whitespace); - if (std.ascii.eqlIgnoreCase(hdr.name, name)) { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }); - } - } - } - } - - const metadata = try transfer.arena.allocator().create(CacheMetadata); - metadata.* = cm; - metadata.headers = header_list.items[0..end_of_response]; - metadata.vary_headers = header_list.items[end_of_response..]; - transfer._pending_cache_metadata = metadata; - } - } - return proceed and transfer.aborted == false; } @@ -1774,7 +1449,7 @@ pub const Transfer = struct { transfer._callback_error = error.ResponseTooLarge; return http.writefunc_error; } - transfer._stream_buffer.ensureTotalCapacity(transfer.arena.allocator(), cl) catch {}; + transfer._stream_buffer.ensureTotalCapacity(transfer.req.params.arena, cl) catch {}; } } @@ -1787,7 +1462,7 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer._stream_buffer.appendSlice(transfer.arena.allocator(), chunk) catch |err| { + transfer._stream_buffer.appendSlice(transfer.req.params.arena, chunk) catch |err| { transfer._callback_error = err; return http.writefunc_error; }; @@ -1800,64 +1475,13 @@ pub const Transfer = struct { } pub fn responseHeaderIterator(self: *Transfer) HeaderIterator { - if (self._conn) |conn| { - // If we have a connection, than this is a real curl request and we - // iterate through the header that curl maintains. - return .{ .curl = .{ .conn = conn } }; - } - // If there's no handle, it either means this is being called before - // the request is even being made (which would be a bug in the code) - // or when a response was injected via transfer.fulfill. The injected - // header should be iterated, since there is no handle/easy. - return .{ .list = .{ .list = self.response_header.?._injected_headers } }; - } + // We always have a real curl request here. We handle injection up in InterceptionLayer. + lp.assert(self._conn != null, "Transfer.responseHeaderIterator", .{ .value = self._conn != null }); + const conn = self._conn.?; - pub fn fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - if (transfer._conn != null) { - // should never happen, should have been intercepted/paused, and then - // either continued, aborted or fulfilled once. - @branchHint(.unlikely); - return error.RequestInProgress; - } - - transfer._fulfill(status, headers, body) catch |err| { - transfer.req.error_callback(transfer.req.ctx, err); - return err; - }; - } - - fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { - const req = &transfer.req; - if (req.start_callback) |cb| { - try cb(Response.fromTransfer(transfer)); - } - - transfer.response_header = .{ - .status = status, - .url = req.url, - .redirect_count = 0, - ._injected_headers = headers, - }; - for (headers) |hdr| { - if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) { - const len = @min(hdr.value.len, ResponseHead.MAX_CONTENT_TYPE_LEN); - @memcpy(transfer.response_header.?._content_type[0..len], hdr.value[0..len]); - transfer.response_header.?._content_type_len = len; - break; - } - } - - lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(Response.fromTransfer(transfer)) == false) { - transfer.abort(error.Abort); - return; - } - - if (body) |b| { - try req.data_callback(Response.fromTransfer(transfer), b); - } - - try req.done_callback(req.ctx); + // If we have a connection, than this is a real curl request and we + // iterate through the header that curl maintains. + return .{ .curl = .{ .conn = conn } }; } // This function should be called during the dataCallback. Calling it after @@ -1890,6 +1514,21 @@ pub const Transfer = struct { } }; +pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { + if (comptime IS_DEBUG) { + lp.assert(self.interception_layer.intercepted > 0, "HttpClient.continueTransfer", .{ .value = self.interception_layer.intercepted }); + log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); + } + + self.interception_layer.intercepted -= 1; + return self.process(transfer); +} + +pub fn deinitRequest(self: *Client, req: Request) void { + req.deinit(); + self.network.app.arena_pool.release(req.params.arena); +} + const Noop = struct { fn headerCallback(_: Response) !bool { return true; diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 68a7ad59..2489382e 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -185,7 +185,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { try frame.dispatchLoad(); const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { frame.notifyNetworkAlmostIdle(); } @@ -211,7 +211,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { // because is_cdp is false, and that can only be // the case when interception isn't possible. if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); + std.debug.assert(http_client.interception_layer.intercepted == 0); } if (browser.hasBackgroundTasks()) { diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 5f82b88e..12cc6cb5 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -265,13 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e } if (remote_url) |url| { - errdefer { - if (is_blocking == false) { - self.scriptList(script).remove(&script.node); - } - // Let the outer errdefer handle releasing the arena if client.request fails - } - if (comptime IS_DEBUG) { var ls: js.Local.Scope = undefined; frame.js.localScope(&ls); @@ -285,23 +278,48 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e }); } - { - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; - try self.client.request(.{ + const headers = try self.getHeaders(); + errdefer headers.deinit(); + + if (is_blocking) { + const response = try self.client.syncRequest(arena, .{ .url = url, - .ctx = script, .method = .GET, .frame_id = frame._frame_id, .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, + .headers = headers, .cookie_jar = &frame._session.cookie_jar, .cookie_origin = frame.url, .resource_type = .script, .notification = frame._session.notification, + }); + + script.source = .{ .remote = response.body }; + script.status = response.status; + script.complete = true; + } else { + errdefer { + self.scriptList(script).remove(&script.node); + // Let the outer errdefer handle releasing the arena if client.request fails + } + + try self.client.request(.{ + .ctx = script, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = headers, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -317,29 +335,21 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e return; } - // this is , it needs to block the caller - // until it's evaluated - var client = self.client; - while (true) { - if (!script.complete) { - _ = try client.tick(200); - continue; - } - if (script.status == 0) { - // an error (that we already logged) - script.deinit(); - return; - } - - // could have already been evaluating if this is dynamically added - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer { - self.is_evaluating = was_evaluating; - script.deinit(); - } - return script.eval(frame); + if (script.status == 0) { + // an error (that we already logged) + script.deinit(); + return; } + + // could have already been evaluating if this is dynamically added + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer { + self.is_evaluating = was_evaluating; + script.deinit(); + } + + script.eval(frame); } fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { @@ -407,16 +417,18 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, .ctx = script, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .resource_type = .script, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -513,16 +525,18 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C self.async_scripts.append(&script.node); self.client.request(.{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), .ctx = script, - .resource_type = .script, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = url, + .method = .GET, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .headers = try self.getHeaders(), + .resource_type = .script, + .cookie_jar = &frame._session.cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, @@ -664,7 +678,6 @@ pub const Script = struct { debug_transfer_aborted: bool = false, debug_transfer_bytes_received: usize = 0, debug_transfer_notified_fail: bool = false, - debug_transfer_intercept_state: u8 = 0, debug_transfer_auth_challenge: bool = false, debug_transfer_easy_id: usize = 0, @@ -740,7 +753,6 @@ pub const Script = struct { .a3 = self.debug_transfer_aborted, .a4 = self.debug_transfer_bytes_received, .a5 = self.debug_transfer_notified_fail, - .a7 = self.debug_transfer_intercept_state, .a8 = self.debug_transfer_auth_challenge, .a9 = self.debug_transfer_easy_id, .b1 = transfer.id, @@ -748,7 +760,6 @@ pub const Script = struct { .b3 = transfer.aborted, .b4 = transfer.bytes_received, .b5 = transfer._notified_fail, - .b7 = @intFromEnum(transfer._intercept_state), .b8 = transfer._auth_challenge != null, .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, }); @@ -758,7 +769,6 @@ pub const Script = struct { self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; }, diff --git a/src/browser/tests/event/keyboard.html b/src/browser/tests/event/keyboard.html index 223a9e0b..6f81a046 100644 --- a/src/browser/tests/event/keyboard.html +++ b/src/browser/tests/event/keyboard.html @@ -132,11 +132,33 @@ } - diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 558d4cce..81d74116 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -95,15 +95,17 @@ pub fn init(url: []const u8, exec: *Execution) !*Worker { const http_client = session.browser.http_client; http_client.request(.{ .ctx = self, - .url = resolved_url, - .method = .GET, - .headers = try http_client.newHeaders(), - .frame_id = self._frame_id, - .loader_id = self._loader_id, - .resource_type = .script, - .cookie_jar = &session.cookie_jar, - .cookie_origin = resolved_url, - .notification = session.notification, + .params = .{ + .url = resolved_url, + .method = .GET, + .headers = try http_client.newHeaders(), + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .resource_type = .script, + .cookie_jar = &session.cookie_jar, + .cookie_origin = resolved_url, + .notification = session.notification, + }, .header_callback = httpHeaderCallback, .data_callback = httpDataCallback, .done_callback = httpDoneCallback, diff --git a/src/browser/webapi/event/KeyboardEvent.zig b/src/browser/webapi/event/KeyboardEvent.zig index 979b5c29..387a5d23 100644 --- a/src/browser/webapi/event/KeyboardEvent.zig +++ b/src/browser/webapi/event/KeyboardEvent.zig @@ -160,6 +160,106 @@ pub const Key = union(enum) { else => |k| @tagName(k), }; } + + /// Legacy `KeyboardEvent.keyCode` value per UI Events spec ยง Annex C + /// (https://www.w3.org/TR/uievents/#legacy-key-attributes). Returns 0 for + /// keys without a defined fixed virtual key code. + /// + /// Spec note: for printable characters, `keyCode` is calculated from the + /// **unmodified** key value's uppercase ASCII. We don't track the + /// unmodified key, so we uppercase `_key` instead. This is exact for + /// letters (uppercase('t') == uppercase('T')) and digits, but for + /// shift-modified symbols (e.g., shift+1='!') it returns the modified + /// char's ASCII rather than the digit's keyCode. Callers needing + /// spec-strict behavior should pass `unmodifiedText` through CDP + /// `Input.dispatchKeyEvent` and use that instead. + pub fn keyCode(self: Key) u32 { + return switch (self) { + // Modifier keys + .Alt, .AltGraph => 18, + .CapsLock => 20, + .Control => 17, + .Meta, .Hyper, .Super => 91, + .NumLock => 144, + .ScrollLock => 145, + .Shift => 16, + + // Whitespace keys (Space hits the .standard path below) + .Enter => 13, + .Tab => 9, + + // Navigation keys + .ArrowDown => 40, + .ArrowLeft => 37, + .ArrowRight => 39, + .ArrowUp => 38, + .End => 35, + .Home => 36, + .PageDown => 34, + .PageUp => 33, + + // Editing keys + .Backspace => 8, + .Clear => 12, + .Delete => 46, + .Insert => 45, + + // UI keys + .Cancel => 3, + .ContextMenu => 93, + .Escape => 27, + .Execute => 43, + .Help => 47, + .Pause => 19, + .Select => 41, + + // Function keys + .F1 => 112, + .F2 => 113, + .F3 => 114, + .F4 => 115, + .F5 => 116, + .F6 => 117, + .F7 => 118, + .F8 => 119, + .F9 => 120, + .F10 => 121, + .F11 => 122, + .F12 => 123, + + .standard => |s| { + if (s.len == 0) return 0; + const c = s[0]; + // Letters: uppercase ASCII + if (c >= 'a' and c <= 'z') return c - 'a' + 'A'; + if (c >= 'A' and c <= 'Z') return c; + // Digits: ASCII + if (c >= '0' and c <= '9') return c; + // Space: 32 โ€” also returned via the ASCII fall-through below, + // but called out for clarity since it's the most common case. + if (c == ' ') return 32; + // Other ASCII chars (best-effort: legacy keyCode for symbols + // is platform-specific and depends on the unmodified key, + // which we don't track). + return c; + }, + + // Keys without a defined legacy keyCode + else => 0, + }; + } + + /// Legacy `KeyboardEvent.charCode` value per UI Events spec ยง Annex C + /// (https://www.w3.org/TR/uievents/#legacy-key-attributes). Returns the + /// Unicode code point of the character produced by the key. Only + /// meaningful inside a `keypress` event โ€” callers must gate accordingly. + pub fn charCode(self: Key) u32 { + return switch (self) { + .Enter => 13, + .standard => |s| if (s.len > 0) s[0] else 0, + else => 0, + }; + } }; pub const Location = enum(i32) { @@ -270,15 +370,27 @@ pub fn getShiftKey(self: *const KeyboardEvent) bool { return self._shift_key; } -// Deprecated: tracked as 0 since we don't synthesise legacy character codes. +// https://www.w3.org/TR/uievents/#dom-keyboardevent-charcode +// charCode is the Unicode code point of the character produced by the key, +// and is only meaningful on `keypress` events. For `keydown` and `keyup` it +// is 0. (Deprecated, but read by legacy event handlers.) +// +// Chrome returns 0 for synthetic events (those created via +// `new KeyboardEvent(...)` rather than dispatched by the user agent), so we +// gate on `_is_trusted` to match. pub fn getCharCode(self: *const KeyboardEvent) u32 { - _ = self; - return 0; + const event = self._proto._proto; + if (event._is_trusted == false) return 0; + if (event._type_string.eql(comptime .wrap("keypress")) == false) return 0; + return self._key.charCode(); } +// https://www.w3.org/TR/uievents/#dom-keyboardevent-keycode +// +// As with `charCode`, Chrome returns 0 for synthetic events. pub fn getKeyCode(self: *const KeyboardEvent) u32 { - _ = self; - return 0; + if (self._proto._proto._is_trusted == false) return 0; + return self._key.keyCode(); } pub fn initKeyboardEvent( @@ -367,3 +479,77 @@ const testing = @import("../../../testing.zig"); test "WebApi: KeyboardEvent" { try testing.htmlRunner("event/keyboard.html", .{}); } + +test "KeyboardEvent: Key.keyCode mapping" { + // Letters: uppercase ASCII regardless of case. + try testing.expectEqual(@as(u32, 65), Key.keyCode(.{ .standard = "a" })); + try testing.expectEqual(@as(u32, 65), Key.keyCode(.{ .standard = "A" })); + try testing.expectEqual(@as(u32, 84), Key.keyCode(.{ .standard = "T" })); + try testing.expectEqual(@as(u32, 90), Key.keyCode(.{ .standard = "z" })); + + // Digits. + try testing.expectEqual(@as(u32, 48), Key.keyCode(.{ .standard = "0" })); + try testing.expectEqual(@as(u32, 53), Key.keyCode(.{ .standard = "5" })); + try testing.expectEqual(@as(u32, 57), Key.keyCode(.{ .standard = "9" })); + + // Space. + try testing.expectEqual(@as(u32, 32), Key.keyCode(.{ .standard = " " })); + + // Modifier keys. + try testing.expectEqual(@as(u32, 16), Key.keyCode(.Shift)); + try testing.expectEqual(@as(u32, 17), Key.keyCode(.Control)); + try testing.expectEqual(@as(u32, 18), Key.keyCode(.Alt)); + try testing.expectEqual(@as(u32, 91), Key.keyCode(.Meta)); + try testing.expectEqual(@as(u32, 20), Key.keyCode(.CapsLock)); + + // Whitespace keys. + try testing.expectEqual(@as(u32, 13), Key.keyCode(.Enter)); + try testing.expectEqual(@as(u32, 9), Key.keyCode(.Tab)); + + // Navigation keys. + try testing.expectEqual(@as(u32, 37), Key.keyCode(.ArrowLeft)); + try testing.expectEqual(@as(u32, 38), Key.keyCode(.ArrowUp)); + try testing.expectEqual(@as(u32, 39), Key.keyCode(.ArrowRight)); + try testing.expectEqual(@as(u32, 40), Key.keyCode(.ArrowDown)); + try testing.expectEqual(@as(u32, 33), Key.keyCode(.PageUp)); + try testing.expectEqual(@as(u32, 34), Key.keyCode(.PageDown)); + try testing.expectEqual(@as(u32, 35), Key.keyCode(.End)); + try testing.expectEqual(@as(u32, 36), Key.keyCode(.Home)); + + // Editing keys. + try testing.expectEqual(@as(u32, 8), Key.keyCode(.Backspace)); + try testing.expectEqual(@as(u32, 46), Key.keyCode(.Delete)); + try testing.expectEqual(@as(u32, 45), Key.keyCode(.Insert)); + + // UI keys. + try testing.expectEqual(@as(u32, 27), Key.keyCode(.Escape)); + try testing.expectEqual(@as(u32, 19), Key.keyCode(.Pause)); + try testing.expectEqual(@as(u32, 93), Key.keyCode(.ContextMenu)); + + // Function keys. + try testing.expectEqual(@as(u32, 112), Key.keyCode(.F1)); + try testing.expectEqual(@as(u32, 123), Key.keyCode(.F12)); + + // Keys without a defined fixed virtual key code. + try testing.expectEqual(@as(u32, 0), Key.keyCode(.Dead)); + try testing.expectEqual(@as(u32, 0), Key.keyCode(.Unidentified)); + try testing.expectEqual(@as(u32, 0), Key.keyCode(.{ .standard = "" })); +} + +test "KeyboardEvent: Key.charCode mapping" { + // Printable characters: Unicode code point of the first byte. + try testing.expectEqual(@as(u32, 97), Key.charCode(.{ .standard = "a" })); + try testing.expectEqual(@as(u32, 65), Key.charCode(.{ .standard = "A" })); + try testing.expectEqual(@as(u32, 48), Key.charCode(.{ .standard = "0" })); + try testing.expectEqual(@as(u32, 32), Key.charCode(.{ .standard = " " })); + + // Enter is the one named key that produces a charCode (\r = 13). + try testing.expectEqual(@as(u32, 13), Key.charCode(.Enter)); + + // Other named keys and the empty standard key produce no character. + try testing.expectEqual(@as(u32, 0), Key.charCode(.Tab)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.Escape)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.ArrowLeft)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.Shift)); + try testing.expectEqual(@as(u32, 0), Key.charCode(.{ .standard = "" })); +} diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 623037d6..777f8311 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -94,16 +94,18 @@ pub fn init(input: Input, options: ?InitOpts, frame: *Frame) !js.Promise { try http_client.request(.{ .ctx = fetch, - .url = request._url, - .method = request._method, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = request._body, - .headers = headers, - .resource_type = .fetch, - .cookie_jar = cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, + .params = .{ + .url = request._url, + .method = request._method, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = request._body, + .headers = headers, + .resource_type = .fetch, + .cookie_jar = cookie_jar, + .cookie_origin = frame.url, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 35f7f7fa..19a7676b 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -257,17 +257,19 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { http_client.request(.{ .ctx = self, - .url = self._url, - .method = self._method, - .headers = headers, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .body = self._request_body, - .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, - .cookie_origin = frame.url, - .resource_type = .xhr, - .timeout_ms = self._timeout, - .notification = frame._session.notification, + .params = .{ + .url = self._url, + .method = self._method, + .headers = headers, + .frame_id = frame._frame_id, + .loader_id = frame._loader_id, + .body = self._request_body, + .cookie_jar = if (cookie_support) &frame._session.cookie_jar else null, + .cookie_origin = frame.url, + .resource_type = .xhr, + .timeout_ms = self._timeout, + .notification = frame._session.notification, + }, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 16a08cc9..e646f4b8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -450,8 +450,25 @@ pub const BrowserContext = struct { // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager - for (self.intercept_state.pendingTransfers()) |transfer| { - transfer.abort(error.ClientDisconnect); + const http_client = browser.http_client; + for (self.intercept_state.pendingIntercepts()) |intercept| { + defer { + lp.assert( + http_client.interception_layer.intercepted > 0, + "BrowserContext.deinit.intercepted", + .{ .value = http_client.interception_layer.intercepted }, + ); + http_client.interception_layer.intercepted -= 1; + } + switch (intercept) { + .transfer => |t| { + t.abort(error.ClientDisconnect); + }, + .request => |r| { + defer http_client.deinitRequest(r); + r.error_callback(r.ctx, error.ClientDisconnect); + }, + } } for (self.isolated_worlds.items) |world| { @@ -668,7 +685,7 @@ pub const BrowserContext = struct { const arena = self.frame_arena; // Prepare the captured response value. - const id = msg.transfer.id; + const id = msg.request.params.request_id; const gop = try self.captured_responses.getOrPut(arena, id); if (!gop.found_existing) { gop.value_ptr.* = .{ @@ -676,8 +693,8 @@ pub const BrowserContext = struct { // Encode the data in base64 by default, but don't encode // for well known content-type. .must_encode = blk: { - const transfer = msg.transfer; - if (transfer.response_header.?.contentType()) |ct| { + const response = msg.response; + if (response.contentType()) |ct| { const mime = try Mime.parse(ct); if (!mime.isText()) { @@ -705,7 +722,7 @@ pub const BrowserContext = struct { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; - const id = msg.transfer.id; + const id = msg.request.params.request_id; const resp = self.captured_responses.getPtr(id) orelse lp.assert(false, "onHttpResponseData missinf captured response", .{}); return resp.data.appendSlice(arena, msg.data); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index d0a1d1db..edff1761 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -54,7 +54,12 @@ pub fn processMessage(cmd: *CDP.Command) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.Transfer), + waiting: std.AutoArrayHashMapUnmanaged(u32, Pending), + + const Pending = union(enum) { + transfer: *HttpClient.Transfer, + request: HttpClient.Request, + }; pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -67,11 +72,15 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, transfer: *HttpClient.Transfer) !void { - return self.waiting.put(self.allocator, transfer.id, transfer); + pub fn putRequest(self: *InterceptState, request: HttpClient.Request) !void { + return self.waiting.put(self.allocator, request.params.request_id, .{ .request = request }); } - pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.Transfer { + pub fn putTransfer(self: *InterceptState, transfer: *HttpClient.Transfer) !void { + return self.waiting.put(self.allocator, transfer.id, .{ .transfer = transfer }); + } + + pub fn remove(self: *InterceptState, request_id: u32) ?Pending { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -80,7 +89,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.Transfer { + pub fn pendingIntercepts(self: *const InterceptState) []Pending { return self.waiting.values(); } }; @@ -190,29 +199,28 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. const session_id = bc.session_id orelse return; // We keep it around to wait for modifications to the request. - // NOTE: we assume whomever created the request created it with a lifetime of the Page. // TODO: What to do when receiving replies for a previous frame's requests? - const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + const request = intercept.request; + try bc.intercept_state.putRequest(request.*); try bc.cdp.sendEvent("Fetch.requestPaused", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", .fetch => "Fetch", }, - .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID + .networkId = &id.toRequestId(request), // matches the Network REQ-ID }, .{ .session_id = session_id }); log.debug(.cdp, "request intercept", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -236,39 +244,50 @@ fn continueRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "continue", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .new_url = params.url, }); - const arena = transfer.arena.allocator(); + const arena = request.params.arena; // Update the request with the new parameters if (params.url) |url| { - try transfer.updateURL(try arena.dupeZ(u8, url)); + request.params.url = try arena.dupeZ(u8, url); } if (params.method) |method| { - transfer.req.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; + request.params.method = std.meta.stringToEnum(http.Method, method) orelse return error.InvalidParams; } if (params.headers) |headers| { - // Not obvious, but cmd.arena is safe here, since the headers will get - // duped by libcurl. transfer.arena is more obvious/safe, but cmd.arena - // is more efficient (it's re-used) - try transfer.replaceRequestHeaders(cmd.arena, headers); + request.params.headers.deinit(); + + var buf: std.ArrayList(u8) = .empty; + var new_headers = try bc.cdp.browser.http_client.newHeaders(); + for (headers) |hdr| { + defer buf.clearRetainingCapacity(); + try std.fmt.format(buf.writer(cmd.arena), "{s}: {s}", .{ hdr.name, hdr.value }); + try buf.append(cmd.arena, 0); + try new_headers.add(buf.items[0 .. buf.items.len - 1 :0]); + } + request.params.headers = new_headers; } if (params.postData) |b| { const decoder = std.base64.standard.Decoder; const body = try arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(body, b); - transfer.req.body = body; + request.params.body = body; } - try bc.cdp.browser.http_client.continueTransfer(transfer); + // todo: replace. + const client = bc.cdp.browser.http_client; + try client.interception_layer.continueRequest(client, request); return cmd.sendResult(null, .{}); } @@ -292,14 +311,18 @@ fn continueWithAuth(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const transfer = pending.transfer; + const request = transfer.req; log.debug(.cdp, "request intercept", .{ .state = "continue with auth", - .id = transfer.id, + .id = request.params.request_id, .response = params.authChallengeResponse.response, }); + const client = bc.cdp.browser.http_client; + if (params.authChallengeResponse.response != .ProvideCredentials) { transfer.abortAuthChallenge(); return cmd.sendResult(null, .{}); @@ -308,17 +331,18 @@ fn continueWithAuth(cmd: *CDP.Command) !void { // cancel the request, deinit the transfer on error. errdefer transfer.abortAuthChallenge(); - // restart the request with the provided credentials. - const arena = transfer.arena.allocator(); - transfer.updateCredentials( - try std.fmt.allocPrintSentinel(arena, "{s}:{s}", .{ + const arena = request.params.arena; + transfer.updateCredentials(try std.fmt.allocPrintSentinel( + arena, + "{s}:{s}", + .{ params.authChallengeResponse.username, params.authChallengeResponse.password, - }, 0), - ); + }, + 0, + )); - transfer.reset(); - try bc.cdp.browser.http_client.continueTransfer(transfer); + try client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -341,12 +365,14 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; + + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + var request = pending.request; log.debug(.cdp, "request intercept", .{ .state = "fulfilled", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, .status = params.responseCode, .body = params.body != null, }); @@ -354,13 +380,13 @@ fn fulfillRequest(cmd: *CDP.Command) !void { var body: ?[]const u8 = null; if (params.body) |b| { const decoder = std.base64.standard.Decoder; - const buf = try transfer.arena.allocator().alloc(u8, try decoder.calcSizeForSlice(b)); + const buf = try request.params.arena.alloc(u8, try decoder.calcSizeForSlice(b)); try decoder.decode(buf, b); body = buf; } - try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); - + const client = bc.cdp.browser.http_client; + try client.interception_layer.fulfillRequest(client, request, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -374,13 +400,16 @@ fn failRequest(cmd: *CDP.Command) !void { var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); - const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; - defer bc.cdp.browser.http_client.abortTransfer(transfer); + const pending = intercept_state.remove(request_id) orelse return error.RequestNotFound; + const request = pending.request; + + const client = bc.cdp.browser.http_client; + defer client.interception_layer.abortRequest(client, request); log.info(.cdp, "request intercept", .{ .state = "fail", .id = request_id, - .url = transfer.url, + .url = request.params.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); @@ -396,15 +425,16 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati // TODO: What to do when receiving replies for a previous frame's requests? const transfer = intercept.transfer; - try bc.intercept_state.put(transfer); + try bc.intercept_state.putTransfer(transfer); + var request = transfer.req; const challenge = transfer._auth_challenge orelse return error.NullAuthChallenge; try bc.cdp.sendEvent("Fetch.authRequired", .{ - .requestId = &id.toInterceptId(transfer.id), - .frameId = &id.toFrameId(transfer.req.frame_id), - .request = network.TransferAsRequestWriter.init(transfer), - .resourceType = switch (transfer.req.resource_type) { + .requestId = &id.toInterceptId(request.params.request_id), + .frameId = &id.toFrameId(request.params.frame_id), + .request = network.RequestWriter.init(&request), + .resourceType = switch (request.params.resource_type) { .script => "Script", .xhr => "XHR", .document => "Document", @@ -416,13 +446,13 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "", .realm = challenge.realm orelse "", }, - .networkId = &id.toRequestId(transfer), + .networkId = &id.toRequestId(&request), }, .{ .session_id = session_id }); log.debug(.cdp, "request auth required", .{ .state = "paused", - .id = transfer.id, - .url = transfer.url, + .id = request.params.request_id, + .url = request.params.url, }); // Await continueWithAuth diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index ff5778e9..0554681a 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -27,6 +27,8 @@ const Mime = @import("../../browser/Mime.zig"); const Notification = @import("../../Notification.zig"); const timestamp = @import("../../datetime.zig").timestamp; const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; const CdpStorage = @import("storage.zig"); @@ -260,7 +262,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = &id.toRequestId(msg.transfer), + .requestId = &id.toRequestId(msg.request), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", .errorText = msg.err, @@ -273,24 +275,23 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; - const frame_id = req.frame_id; + const req = msg.request; + const frame_id = req.params.frame_id; const frame = bc.session.findFrameByFrameId(frame_id) orelse return; // Modify request with extra CDP headers for (bc.extra_headers.items) |extra| { - try req.headers.add(extra); + try req.params.headers.add(extra); } // We're missing a bunch of fields, but, for now, this eems like enough try bc.cdp.sendEvent("Network.requestWillBeSent", .{ .frameId = &id.toFrameId(frame_id), - .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .type = req.resource_type.string(), + .requestId = &id.toRequestId(req), + .loaderId = &id.toLoaderId(req.params.loader_id), + .type = req.params.resource_type.string(), .documentURL = frame.url, - .request = TransferAsRequestWriter.init(transfer), + .request = RequestWriter.init(req), .initiator = .{ .type = "other" }, .redirectHasExtraInfo = false, // TODO change after adding Network.requestWillBeSentExtraInfo .hasUserGesture = false, @@ -304,15 +305,14 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; - const req = &transfer.req; + const req = msg.request; // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.responseReceived", .{ - .frameId = &id.toFrameId(req.frame_id), - .requestId = &id.toRequestId(transfer), - .loaderId = &id.toLoaderId(req.loader_id), - .response = TransferAsResponseWriter.init(arena, transfer), + .frameId = &id.toFrameId(req.params.frame_id), + .requestId = &id.toRequestId(req), + .loaderId = &id.toLoaderId(req.params.loader_id), + .response = ResponseWriter.init(arena, msg.response), .hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo }, .{ .session_id = session_id }); } @@ -321,36 +321,37 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request // detachTarget could be called, in which case, we still have a frame doing // things, but no session. const session_id = bc.session_id orelse return; - const transfer = msg.transfer; + const req = msg.request; try bc.cdp.sendEvent("Network.loadingFinished", .{ - .requestId = &id.toRequestId(transfer), - .encodedDataLength = transfer.bytes_received, + .requestId = &id.toRequestId(req), + .encodedDataLength = msg.content_length, }, .{ .session_id = session_id }); } -pub const TransferAsRequestWriter = struct { - transfer: *Transfer, +pub const RequestWriter = struct { + request: *Request, - pub fn init(transfer: *Transfer) TransferAsRequestWriter { + pub fn init(request: *Request) RequestWriter { return .{ - .transfer = transfer, + .request = request, }; } - pub fn jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { + pub fn jsonStringify(self: *const RequestWriter, jws: anytype) !void { self._jsonStringify(jws) catch return error.WriteFailed; } - fn _jsonStringify(self: *const TransferAsRequestWriter, jws: anytype) !void { - const transfer = self.transfer; + + fn _jsonStringify(self: *const RequestWriter, jws: anytype) !void { + const request = self.request; try jws.beginObject(); { try jws.objectField("url"); - try jws.write(transfer.url); + try jws.write(request.params.url); } { - const frag = URL.getHash(transfer.url); + const frag = URL.getHash(request.params.url); if (frag.len > 0) { try jws.objectField("urlFragment"); try jws.write(frag); @@ -359,23 +360,23 @@ pub const TransferAsRequestWriter = struct { { try jws.objectField("method"); - try jws.write(@tagName(transfer.req.method)); + try jws.write(@tagName(request.params.method)); } { try jws.objectField("hasPostData"); - try jws.write(transfer.req.body != null); + try jws.write(request.params.body != null); } { try jws.objectField("headers"); try jws.beginObject(); - var it = transfer.req.headers.iterator(); + var it = request.params.headers.iterator(); while (it.next()) |hdr| { try jws.objectField(hdr.name); try jws.write(hdr.value); } - if (try transfer.getCookieString()) |cookies| { + if (try request.getCookieString()) |cookies| { try jws.objectField("Cookie"); try jws.write(cookies[0 .. cookies.len - 1]); } @@ -385,34 +386,31 @@ pub const TransferAsRequestWriter = struct { } }; -const TransferAsResponseWriter = struct { +const ResponseWriter = struct { arena: Allocator, - transfer: *Transfer, + response: *const Response, - fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { + fn init(arena: Allocator, response: *const Response) ResponseWriter { return .{ .arena = arena, - .transfer = transfer, + .response = response, }; } - pub fn jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { + pub fn jsonStringify(self: *const ResponseWriter, jws: anytype) !void { self._jsonStringify(jws) catch return error.WriteFailed; } - fn _jsonStringify(self: *const TransferAsResponseWriter, jws: anytype) !void { - const transfer = self.transfer; + fn _jsonStringify(self: *const ResponseWriter, jws: anytype) !void { + const response = self.response; try jws.beginObject(); { try jws.objectField("url"); - try jws.write(transfer.url); + try jws.write(response.url()); } - if (transfer.response_header) |*rh| { - // it should not be possible for this to be false, but I'm not - // feeling brave today. - const status = rh.status; + if (response.status()) |status| { try jws.objectField("status"); try jws.write(status); @@ -422,7 +420,7 @@ const TransferAsResponseWriter = struct { { const mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { + if (response.contentType()) |ct| { break :blk try Mime.parse(ct); } break :blk .unknown; @@ -437,7 +435,8 @@ const TransferAsResponseWriter = struct { { try jws.objectField("timing"); try jws.write(.{ - .requestTime = transfer.start_time, + // TODO: fix + .requestTime = -1, .connectEnd = -1, .connectStart = -1, .dnsEnd = -1, @@ -458,7 +457,7 @@ const TransferAsResponseWriter = struct { // common to get these from a server (e.g. for Cache-Control), but // Chrome joins these. So we have to too. const arena = self.arena; - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; while (it.next()) |hdr| { const gop = try map.getOrPut(arena, hdr.name); diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index ee6f0990..239b5122 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -145,7 +145,7 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void { const http_client = frame._session.browser.http_client; const http_active = http_client.http_active; - const total_network_activity = http_active + http_client.intercepted; + const total_network_activity = http_active + http_client.interception_layer.intercepted; if (frame._notified_network_almost_idle.check(total_network_activity <= 2)) { try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id); } @@ -1182,6 +1182,89 @@ test "cdp.frame: navigate inherits original fragment across redirect" { } } +test "cdp.frame: anchor click sends Referer matching the originating page" { + // HTML Living Standard "navigate" algorithm + Fetch ยง4.5 "request's referrer": + // when a navigation is initiated by a hyperlink click (or form submit, or + // location.href assignment), the resulting request carries a Referer + // header equal to the originating document's URL. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18"; + bc.session_id = "SID-A18"; + bc.target_id = "TID-A18-000000".*; + + // Initial navigation to the page hosting the anchor โ€” driven directly via + // Frame.navigate(.address_bar), so this request itself has no Referer. + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/referer_link.html", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // Click the anchor via JS. The click goes through Frame.scheduleNavigation + // (.reason = .script), which must capture the originating frame's URL as + // the Referer for the queued navigation. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + _ = try ls.local.exec("document.getElementById('link').click()", null); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // After the click navigation completes, the loaded page is /echo_referer + // and its body echoes the Referer header the server actually saw. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec( + "document.body.innerText.includes('referer=http://127.0.0.1:9582/referer_link.html')", + null, + ); + try testing.expect(v.toBool()); + } +} + +test "cdp.frame: address-bar Page.navigate sends no Referer" { + // Regression guard: navigations initiated by the user agent itself (CDP + // Page.navigate, address-bar typed URLs, Page.reload) must not leak the + // previous page's URL as Referer. Matches Chrome. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18B"; + bc.session_id = "SID-A18B"; + bc.target_id = "TID-A18B-00000".*; + + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/echo_referer", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec("document.body.innerText.includes('referer=NONE')", null); + try testing.expect(v.toBool()); + } +} + test "cdp.frame: addScriptToEvaluateOnNewDocument" { var ctx = try testing.context(); defer ctx.deinit(); diff --git a/src/cdp/id.zig b/src/cdp/id.zig index a5d1286f..f6889d24 100644 --- a/src/cdp/id.zig +++ b/src/cdp/id.zig @@ -40,15 +40,14 @@ pub fn toLoaderId(id: u32) [14]u8 { // requestId has special requirements. If it's the main document navigation, // then it should match the loader id. -const Transfer = @import("../browser/HttpClient.zig").Transfer; -pub fn toRequestId(transfer: *const Transfer) [14]u8 { - const req = transfer.req; - if (req.resource_type == .document) { - return toLoaderId(req.loader_id); +const Request = @import("../browser/HttpClient.zig").Request; +pub fn toRequestId(req: *const Request) [14]u8 { + if (req.params.resource_type == .document) { + return toLoaderId(req.params.loader_id); } var buf: [14]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable; + _ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{req.params.request_id}) catch unreachable; return buf; } diff --git a/src/network/layer/CacheLayer.zig b/src/network/layer/CacheLayer.zig new file mode 100644 index 00000000..fd8a5e9f --- /dev/null +++ b/src/network/layer/CacheLayer.zig @@ -0,0 +1,232 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const Cache = @import("../cache/Cache.zig"); +const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata; +const CachedResponse = @import("../cache/Cache.zig").CachedResponse; +const Forward = @import("Forward.zig"); + +const CacheLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *CacheLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *CacheLayer = @ptrCast(@alignCast(ptr)); + const network = client.network; + + if (req.params.method != .GET) { + return self.next.request(client, req); + } + + const arena = req.params.arena; + + var iter = req.params.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (network.cache.?.get(arena, .{ + .url = req.params.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { + return serveFromCache(req, &cached); + } + + const cache_ctx = try arena.create(CacheContext); + cache_ctx.* = .{ + .arena = arena, + .client = client, + .forward = Forward.fromRequest(req), + .req_url = req.params.url, + .req_headers = req.params.headers, + }; + + const wrapped = cache_ctx.forward.wrapRequest( + req, + cache_ctx, + .{ + .start = CacheContext.startCallback, + .header = CacheContext.headerCallback, + .done = CacheContext.doneCallback, + .shutdown = CacheContext.shutdownCallback, + .err = CacheContext.errorCallback, + }, + ); + + return self.next.request(client, wrapped); +} + +fn serveFromCache(req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer switch (cached.data) { + .buffer => |_| {}, + .file => |f| f.file.close(), + }; + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |f| { + const file = f.file; + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + const reader = &file_reader.interface; + var read_buf: [1024]u8 = undefined; + var remaining = f.len; + while (remaining > 0) { + const read_len = @min(read_buf.len, remaining); + const n = try reader.readSliceShort(read_buf[0..read_len]); + if (n == 0) break; + remaining -= n; + try req.data_callback(response, read_buf[0..n]); + } + }, + } + + try req.done_callback(req.ctx); +} + +const CacheContext = struct { + arena: std.mem.Allocator, + client: *Client, + transfer: ?*Transfer = null, + forward: Forward, + req_url: [:0]const u8, + req_headers: http.Headers, + pending_metadata: ?*CachedMetadata = null, + + fn startCallback(response: Response) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + self.transfer = response.inner.transfer; + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *CacheContext = @ptrCast(@alignCast(response.ctx)); + const allocator = self.arena; + + const transfer = response.inner.transfer; + var rh = &transfer.response_header.?; + + const conn = transfer._conn.?; + + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + + const maybe_cm = try Cache.tryCache( + allocator, + std.time.timestamp(), + transfer.url, + rh.status, + rh.contentType(), + if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, + vary, + if (conn.getResponseHeader("age", 0)) |h| h.value else null, + conn.getResponseHeader("set-cookie", 0) != null, + conn.getResponseHeader("authorization", 0) != null, + ); + + if (maybe_cm) |cm| { + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + + if (vary) |vary_str| { + var req_it = self.req_headers.iterator(); + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + } + + const metadata = try allocator.create(CachedMetadata); + metadata.* = cm; + metadata.headers = header_list.items[0..end_of_response]; + metadata.vary_headers = header_list.items[end_of_response..]; + self.pending_metadata = metadata; + } + + return self.forward.forwardHeader(response); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer"); + + if (self.pending_metadata) |metadata| { + const cache = &self.client.network.cache.?; + + log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata }); + cache.put(metadata.*, transfer._stream_buffer.items) catch |err| { + log.warn(.http, "cache put failed", .{ .err = err }); + }; + log.debug(.browser, "http.cache.put", .{ .url = self.req_url }); + } + + return self.forward.forwardDone(); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardShutdown(); + } + + fn errorCallback(ctx: *anyopaque, e: anyerror) void { + const self: *CacheContext = @ptrCast(@alignCast(ctx)); + self.forward.forwardErr(e); + } +}; diff --git a/src/network/layer/Forward.zig b/src/network/layer/Forward.zig new file mode 100644 index 00000000..b11ff23f --- /dev/null +++ b/src/network/layer/Forward.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; + +const Forward = @This(); + +ctx: *anyopaque, +start: ?Request.StartCallback, +header: Request.HeaderCallback, +data: Request.DataCallback, +done: Request.DoneCallback, +err: Request.ErrorCallback, +shutdown: ?Request.ShutdownCallback, + +pub fn fromRequest(req: Request) Forward { + return .{ + .ctx = req.ctx, + .start = req.start_callback, + .header = req.header_callback, + .data = req.data_callback, + .done = req.done_callback, + .err = req.error_callback, + .shutdown = req.shutdown_callback, + }; +} + +pub const Overrides = struct { + start: ?Request.StartCallback = null, + header: ?Request.HeaderCallback = null, + data: ?Request.DataCallback = null, + done: ?Request.DoneCallback = null, + err: ?Request.ErrorCallback = null, + shutdown: ?Request.ShutdownCallback = null, +}; + +pub fn wrapRequest( + self: *Forward, + req: Request, + new_ctx: anytype, + overrides: Overrides, +) Request { + const T = @TypeOf(new_ctx.*); + const PassthroughT = makePassthrough(T, "forward"); + var wrapped = req; + wrapped.ctx = new_ctx; + wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null; + wrapped.header_callback = overrides.header orelse PassthroughT.header; + wrapped.data_callback = overrides.data orelse PassthroughT.data; + wrapped.done_callback = overrides.done orelse PassthroughT.done; + wrapped.error_callback = overrides.err orelse PassthroughT.err; + wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null; + return wrapped; +} + +fn makePassthrough(comptime T: type, comptime field: []const u8) type { + return struct { + pub fn start(response: Response) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardStart(response); + } + + pub fn header(response: Response) anyerror!bool { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardHeader(response); + } + + pub fn data(response: Response, chunk: []const u8) anyerror!void { + const self: *T = @ptrCast(@alignCast(response.ctx)); + return @field(self, field).forwardData(response, chunk); + } + + pub fn done(ctx_ptr: *anyopaque) anyerror!void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + return @field(self, field).forwardDone(); + } + + pub fn err(ctx_ptr: *anyopaque, e: anyerror) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardErr(e); + } + + pub fn shutdown(ctx_ptr: *anyopaque) void { + const self: *T = @ptrCast(@alignCast(ctx_ptr)); + @field(self, field).forwardShutdown(); + } + }; +} + +pub fn forwardStart(self: Forward, response: Response) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + if (self.start) |cb| try cb(fwd); +} + +pub fn forwardHeader(self: Forward, response: Response) anyerror!bool { + var fwd = response; + fwd.ctx = self.ctx; + return self.header(fwd); +} + +pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void { + var fwd = response; + fwd.ctx = self.ctx; + return self.data(fwd, chunk); +} + +pub fn forwardDone(self: Forward) anyerror!void { + return self.done(self.ctx); +} + +pub fn forwardErr(self: Forward, e: anyerror) void { + self.err(self.ctx, e); +} + +pub fn forwardShutdown(self: Forward) void { + if (self.shutdown) |cb| cb(self.ctx); +} diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig new file mode 100644 index 00000000..165cd254 --- /dev/null +++ b/src/network/layer/InterceptionLayer.zig @@ -0,0 +1,270 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); +const lp = @import("lightpanda"); +const log = lp.log; + +const IS_DEBUG = builtin.mode == .Debug; + +const http = @import("../http.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const FulfilledResponse = @import("../../browser/HttpClient.zig").FulfilledResponse; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const InterceptionLayer = @This(); + +// Count of intercepted requests. This is to help deal with intercepted requests. +// The client doesn't track intercepted transfers. If a request is intercepted, +// the client forgets about it and requires the interceptor to continue or abort +// it. That works well, except if we only rely on active, we might think there's +// no more network activity when, with interecepted requests, there might be more +// in the future. (We really only need this to properly emit a 'networkIdle' and +// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +intercepted: usize = 0, + +next: Layer = undefined, + +pub fn layer(self: *InterceptionLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void { + const self: *InterceptionLayer = @ptrCast(@alignCast(ptr)); + + const intercept_ctx = try in_req.params.arena.create(InterceptContext); + intercept_ctx.* = .{ + .client = client, + .forward = Forward.fromRequest(in_req), + .layer = self, + .request = in_req, + }; + + var req = intercept_ctx.forward.wrapRequest( + in_req, + intercept_ctx, + .{ + .start = InterceptContext.startCallback, + .header = InterceptContext.headerCallback, + .data = InterceptContext.dataCallback, + .done = InterceptContext.doneCallback, + .err = InterceptContext.errorCallback, + .shutdown = InterceptContext.shutdownCallback, + }, + ); + + req.params.notification.dispatch(.http_request_start, &.{ .request = &req }); + + var wait_for_interception = false; + req.params.notification.dispatch(.http_request_intercept, &.{ + .request = &req, + .wait_for_interception = &wait_for_interception, + }); + + log.debug(.http, "interception check", .{ + .wait_for_interception = wait_for_interception, + .intercepted = self.intercepted, + .url = req.params.url, + }); + + if (!wait_for_interception) { + return self.next.request(client, req); + } + + self.intercepted += 1; + if (comptime IS_DEBUG) { + log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); + } +} + +pub const InterceptContext = struct { + client: *Client, + forward: Forward, + layer: *InterceptionLayer, + request: Request, + content_length: usize = 0, + + fn startCallback(response: Response) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept start", .{ .url = self.request.params.url }); + return self.forward.forwardStart(response); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept header", .{ + .url = self.request.params.url, + .status = response.status(), + .content_length = response.contentLength(), + }); + + self.content_length = response.contentLength() orelse 0; + + self.request.params.notification.dispatch(.http_response_header_done, &.{ + .request = &self.request, + .response = &response, + }); + + return self.forward.forwardHeader(response); + } + + fn dataCallback(response: Response, chunk: []const u8) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(response.ctx)); + log.debug(.http, "intercept data", .{ + .url = self.request.params.url, + .len = chunk.len, + }); + + self.request.params.notification.dispatch(.http_response_data, &.{ + .data = chunk, + .request = &self.request, + }); + + return self.forward.forwardData(response, chunk); + } + + fn doneCallback(ctx: *anyopaque) anyerror!void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept done", .{ + .url = self.request.params.url, + .content_length = self.content_length, + }); + + self.request.params.notification.dispatch(.http_request_done, &.{ + .request = &self.request, + .content_length = self.content_length, + }); + return self.forward.forwardDone(); + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept error", .{ + .url = self.request.params.url, + .err = err, + }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = err, + }); + self.forward.forwardErr(err); + } + + fn shutdownCallback(ctx: *anyopaque) void { + const self: *InterceptContext = @ptrCast(@alignCast(ctx)); + + log.debug(.http, "intercept shutdown", .{ .url = self.request.params.url }); + self.request.params.notification.dispatch(.http_request_fail, &.{ + .request = &self.request, + .err = error.Shutdown, + }); + self.forward.forwardShutdown(); + } +}; + +// CDP Callbacks +// These handle their own clean up on errors with `self.next.request`. +// This is because they don't pass their error up the chain as they are async callbacks. + +pub fn continueRequest(self: *InterceptionLayer, client: *Client, req: Request) anyerror!void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.continueRequest", .{ .value = self.intercepted }); + log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + self.next.request(client, req) catch |err| { + const ctx: *InterceptContext = @ptrCast(@alignCast(req.ctx)); + req.error_callback(req.ctx, err); + ctx.client.deinitRequest(req); + return err; + }; +} + +pub fn abortRequest(self: *InterceptionLayer, client: *Client, req: Request) void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.abortRequest", .{ .value = self.intercepted }); + log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); + } + self.intercepted -= 1; + + req.error_callback(req.ctx, error.Abort); + client.deinitRequest(req); +} + +fn fulfillInner( + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + const fulfilled = FulfilledResponse{ + .status = status, + .url = req.params.url, + .headers = headers, + .body = body, + }; + + const response = Response.fromFulfilled(req.ctx, &fulfilled); + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + + if (body) |b| { + try req.data_callback(response, b); + } + + try req.done_callback(req.ctx); +} + +pub fn fulfillRequest( + self: *InterceptionLayer, + client: *Client, + req: Request, + status: u16, + headers: []const http.Header, + body: ?[]const u8, +) !void { + if (comptime IS_DEBUG) { + lp.assert(self.intercepted > 0, "InterceptionLayer.fulfillRequest", .{ .value = self.intercepted }); + log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); + } + + self.intercepted -= 1; + defer client.deinitRequest(req); + + fulfillInner(req, status, headers, body) catch |err| { + req.error_callback(req.ctx, err); + return err; + }; +} diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig new file mode 100644 index 00000000..1bfae1b6 --- /dev/null +++ b/src/network/layer/RobotsLayer.zig @@ -0,0 +1,261 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const URL = @import("../../browser/URL.zig"); +const Robots = @import("../Robots.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Response = @import("../../browser/HttpClient.zig").Response; +const Layer = @import("../../browser/HttpClient.zig").Layer; +const Forward = @import("Forward.zig"); + +const RobotsLayer = @This(); + +next: Layer = undefined, +allocator: std.mem.Allocator, +pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty, + +pub fn layer(self: *RobotsLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ + .request = request, + }, + }; +} + +pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void { + var it = self.pending.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(allocator); + } + self.pending.deinit(allocator); +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *RobotsLayer = @ptrCast(@alignCast(ptr)); + + const arena = req.params.arena; + const robots_url = try URL.getRobotsUrl(arena, req.params.url); + + if (client.network.robot_store.get(robots_url)) |robot_entry| { + switch (robot_entry) { + .present => |robots| { + const path = URL.getPathname(req.params.url); + + if (!robots.isAllowed(path)) { + log.warn(.http, "blocked by robots", .{ .url = req.params.url }); + return error.RobotsBlocked; + } + }, + .absent => {}, + } + return self.next.request(client, req); + } + + return self.fetchRobotsThenRequest(client, robots_url, req); +} + +fn fetchRobotsThenRequest( + self: *RobotsLayer, + client: *Client, + robots_url: [:0]const u8, + req: Request, +) !void { + const entry = try self.pending.getOrPut(self.allocator, robots_url); + + if (!entry.found_existing) { + errdefer std.debug.assert(self.pending.remove(robots_url)); + entry.value_ptr.* = .empty; + + // This arena is later owned by the Request. It does not need to be cleaned up by us because + // it will be cleaned up by the `Transfer.deinit()` or any `Request.deinit()` called on any sublayers. + const new_arena = try client.network.app.arena_pool.acquire(.small, "RobotsLayer.RobotsContext"); + errdefer client.network.app.arena_pool.release(new_arena); + + const robots_ctx = try new_arena.create(RobotsContext); + robots_ctx.* = .{ + .layer = self, + .client = client, + .arena = new_arena, + .robots_url = robots_url, + .buffer = .empty, + }; + + const headers = try client.newHeaders(); + log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); + + try self.next.request(client, .{ + .ctx = robots_ctx, + .params = .{ + // We have to do this ourselves because we are not going through the top level `request`. + .arena = new_arena, + .request_id = client.incrReqId(), + .url = robots_url, + .method = .GET, + .headers = headers, + .frame_id = req.params.frame_id, + .loader_id = req.params.loader_id, + .cookie_jar = req.params.cookie_jar, + .cookie_origin = req.params.cookie_origin, + .notification = req.params.notification, + .resource_type = .fetch, + }, + .header_callback = RobotsContext.headerCallback, + .data_callback = RobotsContext.dataCallback, + .done_callback = RobotsContext.doneCallback, + .error_callback = RobotsContext.errorCallback, + .shutdown_callback = RobotsContext.shutdownCallback, + }); + } + + try entry.value_ptr.append(self.allocator, req); +} + +fn flushPending(self: *RobotsLayer, client: *Client, robots_url: [:0]const u8, allowed: bool) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPending: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + if (!allowed) { + log.warn(.http, "blocked by robots", .{ .url = queued_req.params.url }); + defer client.deinitRequest(queued_req); + queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); + } else { + self.next.request(client, queued_req) catch |e| { + defer client.deinitRequest(queued_req); + queued_req.error_callback(queued_req.ctx, e); + }; + } + } +} + +fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8, client: *Client) void { + var queued = self.pending.fetchRemove(robots_url) orelse + @panic("RobotsLayer.flushPendingShutdown: missing queue"); + defer queued.value.deinit(self.allocator); + + for (queued.value.items) |queued_req| { + defer client.deinitRequest(queued_req); + if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx); + } +} + +const RobotsContext = struct { + layer: *RobotsLayer, + arena: std.mem.Allocator, + client: *Client, + robots_url: [:0]const u8, + buffer: std.ArrayListUnmanaged(u8), + status: u16 = 0, + + fn deinit(self: *RobotsContext) void { + self.buffer.deinit(self.layer.allocator); + self.layer.allocator.destroy(self); + } + + fn headerCallback(response: Response) anyerror!bool { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + switch (response.inner) { + .transfer => |t| { + if (t.response_header) |hdr| { + log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url }); + self.status = hdr.status; + } + if (t.getContentLength()) |cl| { + try self.buffer.ensureTotalCapacity(self.arena, cl); + } + }, + else => {}, + } + return true; + } + + fn dataCallback(response: Response, data: []const u8) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(response.ctx)); + try self.buffer.appendSlice(self.arena, data); + } + + fn doneCallback(ctx_ptr: *anyopaque) anyerror!void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + var allowed = true; + const network = client.network; + + switch (self.status) { + 200 => { + if (self.buffer.items.len > 0) { + const robots: ?Robots = network.robot_store.robotsFromBytes( + network.config.http_headers.user_agent, + self.buffer.items, + ) catch blk: { + log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url }); + try network.robot_store.putAbsent(robots_url); + break :blk null; + }; + if (robots) |r| { + try network.robot_store.put(robots_url, r); + const path = URL.getPathname(l.pending.get(robots_url).?.items[0].params.url); + allowed = r.isAllowed(path); + } + } + }, + 404 => { + log.debug(.http, "robots not found", .{ .url = robots_url }); + try network.robot_store.putAbsent(robots_url); + }, + else => { + log.debug(.http, "unexpected status on robots", .{ + .url = robots_url, + .status = self.status, + }); + try network.robot_store.putAbsent(robots_url); + }, + } + + l.flushPending(client, robots_url, allowed); + } + + fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + log.warn(.http, "robots fetch failed", .{ .err = err }); + l.flushPending(client, robots_url, true); + } + + fn shutdownCallback(ctx_ptr: *anyopaque) void { + const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr)); + const l = self.layer; + const client = self.client; + const robots_url = self.robots_url; + + log.debug(.http, "robots fetch shutdown", .{}); + l.flushPendingShutdown(robots_url, client); + } +}; diff --git a/src/network/layer/WebBotAuthLayer.zig b/src/network/layer/WebBotAuthLayer.zig new file mode 100644 index 00000000..7e67af49 --- /dev/null +++ b/src/network/layer/WebBotAuthLayer.zig @@ -0,0 +1,51 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const log = lp.log; + +const URL = @import("../../browser/URL.zig"); +const WebBotAuth = @import("../WebBotAuth.zig"); +const Client = @import("../../browser/HttpClient.zig").Client; +const Request = @import("../../browser/HttpClient.zig").Request; +const Layer = @import("../../browser/HttpClient.zig").Layer; + +const WebBotAuthLayer = @This(); + +next: Layer = undefined, + +pub fn layer(self: *WebBotAuthLayer) Layer { + return .{ + .ptr = self, + .vtable = &.{ .request = request }, + }; +} + +fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void { + const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr)); + var our_req = req; + + const wba = client.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth"); + + const arena = req.params.arena; + const authority = URL.getHost(req.params.url); + try wba.signRequest(arena, &our_req.params.headers, authority); + + return self.next.request(client, our_req); +} diff --git a/src/testing.zig b/src/testing.zig index 2fb04668..1ea704b0 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -662,6 +662,39 @@ fn testHTTPHandler(req: *std.http.Server.Request) !void { }); } + if (std.mem.eql(u8, path, "/echo_referer")) { + // Echo the request's Referer header back as HTML so tests can assert + // what Referer the navigation sent. Used by the cross-page Referer test. + var it = req.iterateHeaders(); + var referer: []const u8 = "NONE"; + while (it.next()) |h| { + if (std.ascii.eqlIgnoreCase(h.name, "Referer")) { + referer = h.value; + break; + } + } + var html_buf: [512]u8 = undefined; + const html = try std.fmt.bufPrint(&html_buf, "referer={s}", .{referer}); + return req.respond(html, .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }); + } + + if (std.mem.eql(u8, path, "/referer_link.html")) { + // Page with an anchor link to /echo_referer. The test clicks the link + // via JS and asserts the resulting page reports Referer = this page. + return req.respond( + "go", + .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }, + ); + } + if (std.mem.eql(u8, path, "/echo_method")) { // Echo the request method back as HTML so tests can assert on what // method the navigation used. Used by the Page.reload-replays-POST test.