// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . const std = @import("std"); const lp = @import("lightpanda"); const App = @import("../App.zig"); const Inbox = @import("../Inbox.zig"); const Network = @import("../network/Network.zig"); const Notification = @import("../Notification.zig"); const WS = @import("../network/WS.zig"); const js = @import("../browser/js/js.zig"); const Browser = @import("../browser/Browser.zig"); const Session = @import("../browser/Session.zig"); const Frame = @import("../browser/Frame.zig"); const Mime = @import("../browser/Mime.zig"); const Element = @import("../browser/webapi/Element.zig"); const Label = @import("../browser/webapi/element/html/Label.zig"); const Transfer = @import("../browser/HttpClient.zig").Transfer; const Connection = @import("Connection.zig"); const Incrementing = @import("id.zig").Incrementing; const InterceptState = @import("domains/fetch.zig").InterceptState; const log = lp.log; const json = std.json; const posix = std.posix; const Allocator = std.mem.Allocator; pub const URL_BASE = "chrome://newtab/"; const IS_DEBUG = @import("builtin").mode == .Debug; const SessionIdGen = Incrementing(u32, "SID"); const BrowserContextIdGen = Incrementing(u32, "BID"); // webmcp tool invocation pub const InvocationIdGen = Incrementing(u32, "INV"); // Generic so that we can inject mocks into it. const CDP = @This(); app: *App, conn: Connection, browser: Browser, allocator: Allocator, // Network-thread read-side handle for the CDP socket. Populated in // init; Server.handleConnection calls network.registerCdp(&cdp.link) // after the worker-side handshake completes, and unregisterCdp before // teardown. link: Network.CdpLink, // when true, any target creation must be attached. target_auto_attach: bool = false, session_id_gen: SessionIdGen = .{}, browser_context_id_gen: BrowserContextIdGen = .{}, browser_context: ?BrowserContext, // Re-used arena for processing a message. We're assuming that we're getting // 1 message at a time. message_arena: std.heap.ArenaAllocator, // Used for processing notifications within a browser context. notification_arena: std.heap.ArenaAllocator, // Valid for 1 frame navigation (what CDP calls a "renderer") frame_arena: std.heap.ArenaAllocator, // Valid for the entire lifetime of the BrowserContext. Should minimize // (or altogether eliminate) our use of this. browser_context_arena: std.heap.ArenaAllocator, pub fn init( self: *CDP, app: *App, socket: posix.socket_t, json_version_response: []const u8, ) !void { const allocator = app.allocator; self.* = .{ .app = app, .link = undefined, .conn = undefined, .browser = undefined, .allocator = allocator, .browser_context = null, .frame_arena = std.heap.ArenaAllocator.init(allocator), .message_arena = std.heap.ArenaAllocator.init(allocator), .notification_arena = std.heap.ArenaAllocator.init(allocator), .browser_context_arena = std.heap.ArenaAllocator.init(allocator), }; try self.browser.init(app, .{ .env = .{ .with_inspector = true } }, self); const http_client = &self.browser.http_client; try self.conn.init(allocator, socket, json_version_response, &http_client.inbox, &app.arena_pool); errdefer self.conn.deinit(); self.link = .{ .cdp = self, .state = .live, .socket = socket, .handle = http_client.handle, }; } pub fn deinit(self: *CDP) void { if (self.browser_context) |*bc| { bc.deinit(); } self.browser.deinit(); self.frame_arena.deinit(); self.message_arena.deinit(); self.notification_arena.deinit(); self.browser_context_arena.deinit(); self.conn.deinit(); } // Called by Network when readable bytes arrive on the CDP socket. // Feeds them through the WS framer and pushes each parsed frame into // the worker's inbox. Returns false if a close frame was seen (or a // fatal frame error) so Network drops the link from its poll set. // // One network read can carry more bytes than the reader's current // free space — large CDP messages (Page.addScriptToEvaluateOnNewDocument, // Runtime evaluation results, etc.) routinely exceed 16 KB, and a // single read can contain multiple messages, or part of messages. We loop: feed // what fits, run processMessages (which extracts complete frames, compacts the // reader, and grows the buffer if it sees a frame header larger than current // capacity), repeat until the chunk is drained. pub fn onData(self: *CDP, data: []const u8) anyerror!bool { var remaining = data; while (remaining.len > 0) { const n = self.conn.feedBytes(remaining); remaining = remaining[n..]; if ((try self.conn.processMessages()) == false) { return false; } } return true; } // Called by Network when it drops the link unsolicited (peer EOF, read // error, poll HUP/ERR). Push a disconnect into the inbox so the // worker's drainInbox surfaces error.ClientDisconnected. pub fn onLinkDisconnect(self: *CDP, err: ?anyerror) void { const arena = self.browser.arena_pool.acquire(.tiny, "cdp disconnect") catch |e| switch (e) { error.OutOfMemory => @panic("OOM"), }; self.browser.http_client.inbox.push(arena, .{ .disconnect = err }); } // Called in the Worker to dispatch a single CDP message bubbled up by // HttpClient.drainInbox. The Network thread already parsed the JSON // when it pushed the message to the inbox, so we skip straight to // dispatchParsed without re-parsing. `c.raw` and `c.arena` outlive // this call (they're owned by the inbox Message which drainInbox // frees right after we return), so `c.input`'s string slices stay // valid for the duration of dispatch. pub fn onMessage(self: *CDP, c: *Inbox.Message.Cdp) anyerror!void { const arena = &self.message_arena; defer _ = arena.reset(.{ .retain_with_limit = 1024 * 16 }); return self.dispatchParsed(arena.allocator(), .{ .cdp = self }, c.raw, c.input); } // Parse + dispatch a raw JSON CDP message. Used by tests (which // don't go through the Network thread / inbox pipeline) and by any // caller that has bytes rather than a pre-parsed InputMessage. pub fn processMessage(self: *CDP, msg: []const u8) !void { const arena = &self.message_arena; defer _ = arena.reset(.{ .retain_with_limit = 1024 * 16 }); return self.dispatch(arena.allocator(), .{ .cdp = self }, msg); } // Called in the worker when a PING message is received pub fn onPing(self: *CDP, body: []const u8) void { self.conn.sendPong(body) catch |err| { log.warn(.app, "CDP pong", .{ .err = err }); }; } // Called in the Worker when a peer-initiated close with CLOSE_NORMAL. The worker // loop tears down immediately after; drainInbox returns // error.ClientDisconnected once we return. pub fn onClose(self: *CDP) void { self.conn.send(&WS.CLOSE_NORMAL) catch |err| { log.warn(.app, "CDP close reply", .{ .err = err }); }; } // Called in the Worker when the peer disconnected (peer EOF, fatal frame error, // or right after a peer close was replied to). drainInbox returns // error.ClientDisconnected, which the worker loop catches to tear down. // // If `err` is a recognized WS framing error, send the matching close // frame back to the peer before tearing down — that's how clients // observe protocol violations (close code 1002 / 1009 / etc.). pub fn onDisconnect(self: *CDP, err: ?anyerror) void { if (err) |e| { if (WS.errorReply(e)) |close_frame| { self.conn.send(close_frame) catch {}; } } log.info(.cdp, "CDP disconnect", .{ .err = err }); } pub fn sendJSON(self: *CDP, message: anytype) !void { try self.conn.sendJSON(message, .{ .emit_null_optional_fields = false }); } pub fn tick(self: *CDP) !bool { // Liveness is enforced by TCP keepalive configured in // Server.configureSocket; the wakeup lets V8 run or terminate. const wait_ms: u32 = 1000; // 1s self.pageWait(wait_ms) catch |wait_err| switch (wait_err) { error.NoPage => { // No active page yet (or a teardown is in flight). Fall // back to ticking the http client directly so CDP messages // still get dispatched. self.browser.http_client.tick(wait_ms, .all) catch |err| switch (err) { error.ClientDisconnected => return false, else => { log.err(.app, "http tick", .{ .err = err }); return false; }, }; }, error.ClientDisconnected => return false, else => return wait_err, }; return true; } fn pageWait(self: *CDP, ms: u32) !void { const session = &(self.browser.session orelse return error.NoPage); var runner = try session.runner(.{}); return runner.waitCDP(.{ .ms = ms }); } // Parse-then-dispatch entry point. Used by: // - CDP.processMessage (tests, and any other caller that hands us // raw JSON bytes). // - Target.sendMessageToTarget (a CDP command that wraps another // CDP message as a string parameter and forwards it through the // dispatch table). // The normal Network-thread path doesn't go through here — it parses // once on the Network thread and reaches dispatchParsed directly via // onMessage. pub fn dispatch(self: *CDP, arena: Allocator, sender: Command.Sender, str: []const u8) !void { const input = json.parseFromSliceLeaky(InputMessage, arena, str, .{ .ignore_unknown_fields = true, }) catch return error.InvalidJSON; return self.dispatchParsed(arena, sender, str, input); } // Dispatch a pre-parsed CDP message. The caller is responsible for // keeping `str` and the backing storage for `input`'s string slices // alive for the duration of the call. fn dispatchParsed(self: *CDP, arena: Allocator, sender: Command.Sender, str: []const u8, input: InputMessage) !void { var command = Command{ .input = .{ .json = str, .id = input.id, .action = "", .params = input.params, .session_id = input.sessionId, }, .cdp = self, .arena = arena, .sender = sender, .browser_context = if (self.browser_context) |*bc| bc else null, }; // See dispatchStartupCommand for more info on this. var is_startup = false; if (input.sessionId) |input_session_id| { if (std.mem.eql(u8, input_session_id, "STARTUP")) { is_startup = true; } else if (self.isValidSessionId(input_session_id) == false) { return command.sendError(-32001, "Unknown sessionId", .{}); } } if (is_startup) { dispatchStartupCommand(&command, input.method) catch |err| { command.sendError(-31999, @errorName(err), .{}) catch return err; }; } else { dispatchCommand(&command, input.method) catch |err| { command.sendError(-31998, @errorName(err), .{}) catch return err; }; } } // A CDP session isn't 100% fully driven by the driver. There's are // independent actions that the browser is expected to take. For example // Puppeteer expects the browser to startup a tab and thus have existing // targets. // To this end, we create a [very] dummy BrowserContext, Target and // Session. There isn't actually a BrowserContext, just a special id. // When messages are received with the "STARTUP" sessionId, we do // "special" handling - the bare minimum we need to do until the driver // switches to a real BrowserContext. // (I can imagine this logic will become driver-specific) fn dispatchStartupCommand(command: *Command, method: []const u8) !void { // Stagehand parses the response and error if we don't return a // correct one for Page.getFrameTree on startup call. if (std.mem.eql(u8, method, "Page.getFrameTree")) { // The Page.getFrameTree handles startup response gracefully. return dispatchCommand(command, method); } return command.sendResult(null, .{}); } fn dispatchCommand(command: *Command, method: []const u8) !void { const domain = blk: { const i = std.mem.indexOfScalarPos(u8, method, 0, '.') orelse { return error.InvalidMethod; }; command.input.action = method[i + 1 ..]; break :blk method[0..i]; }; switch (domain.len) { 2 => switch (@as(u16, @bitCast(domain[0..2].*))) { asUint(u16, "LP") => return @import("domains/lp.zig").processMessage(command), else => {}, }, 3 => switch (@as(u24, @bitCast(domain[0..3].*))) { asUint(u24, "DOM") => return @import("domains/dom.zig").processMessage(command), asUint(u24, "Log") => return @import("domains/log.zig").processMessage(command), asUint(u24, "CSS") => return @import("domains/css.zig").processMessage(command), else => {}, }, 4 => switch (@as(u32, @bitCast(domain[0..4].*))) { asUint(u32, "Page") => return @import("domains/page.zig").processMessage(command), else => {}, }, 5 => switch (@as(u40, @bitCast(domain[0..5].*))) { asUint(u40, "Fetch") => return @import("domains/fetch.zig").processMessage(command), asUint(u40, "Input") => return @import("domains/input.zig").processMessage(command), else => {}, }, 6 => switch (@as(u48, @bitCast(domain[0..6].*))) { asUint(u48, "Target") => return @import("domains/target.zig").processMessage(command), asUint(u48, "Audits") => return @import("domains/audits.zig").processMessage(command), asUint(u48, "WebMCP") => return @import("domains/webmcp.zig").processMessage(command), else => {}, }, 7 => switch (@as(u56, @bitCast(domain[0..7].*))) { asUint(u56, "Browser") => return @import("domains/browser.zig").processMessage(command), asUint(u56, "Runtime") => return @import("domains/runtime.zig").processMessage(command), asUint(u56, "Network") => return @import("domains/network.zig").processMessage(command), asUint(u56, "Storage") => return @import("domains/storage.zig").processMessage(command), asUint(u56, "Console") => return @import("domains/console.zig").processMessage(command), else => {}, }, 8 => switch (@as(u64, @bitCast(domain[0..8].*))) { asUint(u64, "Security") => return @import("domains/security.zig").processMessage(command), else => {}, }, 9 => switch (@as(u72, @bitCast(domain[0..9].*))) { asUint(u72, "Emulation") => return @import("domains/emulation.zig").processMessage(command), asUint(u72, "Inspector") => return @import("domains/inspector.zig").processMessage(command), else => {}, }, 11 => switch (@as(u88, @bitCast(domain[0..11].*))) { asUint(u88, "Performance") => return @import("domains/performance.zig").processMessage(command), else => {}, }, 13 => switch (@as(u104, @bitCast(domain[0..13].*))) { asUint(u104, "Accessibility") => return @import("domains/accessibility.zig").processMessage(command), else => {}, }, else => {}, } return error.UnknownDomain; } fn isValidSessionId(self: *const CDP, input_session_id: []const u8) bool { const browser_context = &(self.browser_context orelse return false); const session_id = browser_context.session_id orelse return false; return std.mem.eql(u8, session_id, input_session_id); } pub fn createBrowserContext(self: *CDP) ![]const u8 { if (self.browser_context != null) { return error.AlreadyExists; } const id = self.browser_context_id_gen.next(); self.browser_context = @as(BrowserContext, undefined); const browser_context = &self.browser_context.?; try BrowserContext.init(browser_context, id, self); return id; } pub fn disposeBrowserContext(self: *CDP, browser_context_id: []const u8) bool { const bc = &(self.browser_context orelse return false); if (std.mem.eql(u8, bc.id, browser_context_id) == false) { return false; } bc.deinit(); self.browser.closeSession(); self.browser_context = null; _ = self.browser_context_arena.reset(.{ .retain_with_limit = 1024 * 16 }); return true; } const SendEventOpts = struct { session_id: ?[]const u8 = null, }; pub fn sendEvent(self: *CDP, method: []const u8, p: anytype, opts: SendEventOpts) !void { return self.sendJSON(.{ .method = method, .params = if (comptime @typeInfo(@TypeOf(p)) == .null) struct {}{} else p, .sessionId = opts.session_id, }); } pub const BrowserContext = struct { const Node = @import("Node.zig"); const AXNode = @import("AXNode.zig"); const CapturedResponse = struct { must_encode: bool, data: std.ArrayList(u8), }; // Key for `captured_responses`. Documents are keyed by `loader_id`, // everything else by `request_id` — the two id-spaces are independent // counters and overlap numerically (loader 1 / request 1, loader 2 / // request 2, ...), so the map key has to carry the namespace or // entries collide. The wire-format prefix (`LID-` / `REQ-`) provides // the same disambiguation on lookup; see `idFromRequestId` in // domains/network.zig. pub const CapturedResponseKey = struct { kind: enum { request, loader }, id: u32, }; id: []const u8, cdp: *CDP, // Represents the browser session. There is no equivalent in CDP. For // all intents and purpose, from CDP's point of view our Browser and // our Session more or less maps to a BrowserContext. THIS HAS ZERO // RELATION TO SESSION_ID session: *Session, // Tied to the lifetime of the BrowserContext arena: Allocator, // Tied to the lifetime of 1 page rendered in the BrowserContext. frame_arena: Allocator, // From the parent's notification_arena.allocator(). Most of the CDP // code paths deal with a cmd which has its own arena (from the // message_arena). But notifications happen outside of the typical CDP // request->response, and thus don't have a cmd and don't have an arena. notification_arena: Allocator, // Maps to our Page. (There are other types of targets, but we only // deal with "pages" for now). Since we only allow 1 open page at a // time, we only have 1 target_id. target_id: ?[14]u8, // The CDP session_id. After the target/page is created, the client // "attaches" to it (either explicitly or automatically). We return a // "sessionId" which identifies this link. `sessionId` is the how // the CDP client informs us what it's trying to manipulate. Because we // only support 1 BrowserContext at a time, and 1 page at a time, this // is all pretty straightforward, but it still needs to be enforced, i.e. // if we get a request with a sessionId that doesn't match the current one // we should reject it. session_id: ?[]const u8, security_origin: []const u8, page_life_cycle_events: bool, secure_context_type: []const u8, node_registry: Node.Registry, node_search_list: Node.Search.List, inspector_session: *js.Inspector.Session, isolated_worlds: std.ArrayList(*IsolatedWorld), // Scripts registered via Page.addScriptToEvaluateOnNewDocument. // Evaluated in each new document after navigation completes. scripts_on_new_document: std.ArrayList(ScriptOnNewDocument) = .empty, next_script_id: u32 = 1, http_proxy_changed: bool = false, user_agent_changed: bool = false, // Extra headers to add to all requests. extra_headers: std.ArrayList([*c]const u8) = .empty, intercept_state: InterceptState, // When network is enabled, we'll capture the transfer.id -> body // This is awfully memory intensive, but our underlying http client and // its users (script manager and frame) correctly do not hold the body // memory longer than they have to. In fact, the main request is only // ever streamed. So if CDP is the only thing that needs bodies in // memory for an arbitrary amount of time, then that's where we're going // to store the, captured_responses: std.AutoHashMapUnmanaged(CapturedResponseKey, CapturedResponse), notification: *Notification, // Pre-armed response for the next JS dialog (alert/confirm/prompt). // Set by Page.handleJavaScriptDialog; consumed (and cleared) when the // next javascript_dialog_opening notification is dispatched. Strings // are duplicated into self.arena so they outlive the CDP command's // own message arena. pending_dialog_response: ?Notification.DialogResponse = null, // webmcp tool invocation invocation_id_gen: InvocationIdGen = .{}, // WebMCP domain state. Populated when `WebMCP.enable` is received. webmcp_invocations: std.AutoHashMapUnmanaged(u32, *@import("domains/webmcp.zig").Invocation) = .empty, fn init(self: *BrowserContext, id: []const u8, cdp: *CDP) !void { const allocator = cdp.allocator; // Create notification for this BrowserContext const notification = try Notification.init(allocator); errdefer notification.deinit(); const session = try cdp.browser.newSession(notification); if (cdp.app.config.cookieFile()) |cookie_path| { lp.cookies.loadFromFile(session, cookie_path); } const browser = &cdp.browser; const inspector_session = browser.env.inspector.?.startSession(self); errdefer browser.env.inspector.?.stopSession(); var registry = Node.Registry.init(allocator); errdefer registry.deinit(); self.* = .{ .id = id, .cdp = cdp, .target_id = null, .session_id = null, .session = session, .security_origin = URL_BASE, .secure_context_type = "Secure", // TODO = enum .page_life_cycle_events = false, // TODO; Target based value .node_registry = registry, .node_search_list = undefined, .isolated_worlds = .empty, .inspector_session = inspector_session, .frame_arena = cdp.frame_arena.allocator(), .arena = cdp.browser_context_arena.allocator(), .notification_arena = cdp.notification_arena.allocator(), .intercept_state = try InterceptState.init(allocator), .captured_responses = .empty, .notification = notification, }; self.node_search_list = Node.Search.List.init(allocator, &self.node_registry); errdefer self.deinit(); try notification.register(.frame_remove, self, onFrameRemove); try notification.register(.frame_created, self, onFrameCreated); try notification.register(.frame_navigate, self, onFrameNavigate); try notification.register(.frame_navigated, self, onFrameNavigated); try notification.register(.frame_child_frame_created, self, onFrameChildFrameCreated); try notification.register(.frame_dom_content_loaded, self, onFrameDOMContentLoaded); try notification.register(.frame_loaded, self, onFrameLoaded); try notification.register(.javascript_dialog_opening, self, onJavascriptDialogOpening); } pub fn deinit(self: *BrowserContext) void { const browser = &self.cdp.browser; const env = &browser.env; // resetContextGroup detach the inspector from all contexts. // It appends async tasks, so we make sure we run the message loop // before deinit it. env.inspector.?.resetContextGroup(); env.inspector.?.stopSession(); // abort all intercepted requests before closing the session/page // since some of these might callback into the page/scriptmanager. // intercept_state stores ids — look each one up; if it's already // gone (out-of-band destroy), there's nothing to abort, but the // intercepted counter still needs decrementing because we // incremented it on pause. const http_client = &browser.http_client; for (self.intercept_state.pendingIntercepts()) |transfer_id| { lp.assert( http_client.interception_layer.intercepted > 0, "BrowserContext.deinit.intercepted", .{ .value = http_client.interception_layer.intercepted }, ); http_client.interception_layer.intercepted -= 1; if (http_client.findTransfer(transfer_id)) |transfer| { transfer.abort(error.ClientDisconnect); } } // Notify any CDP client waiting on an in-flight WebMCP invocation // before the V8 context (and its promise callbacks) get torn down // by browser.closeSession below. @import("domains/webmcp.zig").cancelAllPending(self); for (self.isolated_worlds.items) |world| { world.deinit(); } self.isolated_worlds.clearRetainingCapacity(); // do this before closeSession, since we don't want to process any // new notification (Or maybe, instead of the deinit above, we just // rely on those notifications to do our normal cleanup?) self.notification.unregisterAll(self); // If the session has a frame, we need to clear it first. The page // context is always nested inside of the isolated world context, // so we need to shutdown the page one first. browser.closeSession(); self.node_registry.deinit(); self.node_search_list.deinit(); self.notification.deinit(); if (self.http_proxy_changed) { // has to be called after browser.closeSession, since it won't // work if there are active connections. browser.http_client.changeProxy(null) catch |err| { log.warn(.http, "changeProxy", .{ .err = err }); }; } if (self.user_agent_changed) { browser.http_client.clearUserAgentOverride(); } self.intercept_state.deinit(); } pub fn reset(self: *BrowserContext) void { self.node_registry.reset(); self.node_search_list.reset(); } pub fn createIsolatedWorld(self: *BrowserContext, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld { const browser = &self.cdp.browser; const arena = try browser.arena_pool.acquire(.small, "IsolatedWorld"); errdefer browser.arena_pool.release(arena); const call_arena = try browser.arena_pool.acquire(.tiny, "IsolatedWorld.call_arena"); errdefer browser.arena_pool.release(call_arena); const world = try arena.create(IsolatedWorld); world.* = .{ .arena = arena, .call_arena = call_arena, .context = null, .browser = browser, .name = try arena.dupe(u8, world_name), .grant_universal_access = grant_universal_access, }; try self.isolated_worlds.append(self.arena, world); return world; } pub fn nodeWriter(self: *BrowserContext, root: *const Node, opts: Node.Writer.Opts) Node.Writer { return .{ .root = root, .depth = opts.depth, .exclude_root = opts.exclude_root, .registry = &self.node_registry, }; } pub fn axnodeWriter(self: *BrowserContext, temp_arena: Allocator, root: *const Node, opts: AXNode.Writer.Opts) !AXNode.Writer { // Bind the writer to the frame that owns the root node, not whatever // happens to be `currentFrame`. Name resolution (`Label.findLabelByFor` // against `ownerDocument`) and visibility checks (`frame._style_manager`) // are per-frame; getting this wrong on cross-frame queries produces // names/visibility from the wrong document. const fallback = self.session.currentFrame() orelse return error.FrameNotLoaded; const frame = root.dom.ownerFrame(fallback); const cache = try frame.call_arena.create(Element.VisibilityCache); cache.* = .empty; const label_index = try frame.call_arena.create(Label.LabelByForIndex); label_index.* = .{}; return .{ .frame = frame, .root = root, .registry = &self.node_registry, .visibility_cache = cache, .label_index = label_index, .temp_arena = temp_arena, .filter = opts.filter, }; } pub fn getURL(self: *const BrowserContext) ?[:0]const u8 { const frame = self.session.currentFrame() orelse return null; const url = frame.url; return if (url.len == 0) null else url; } pub fn getTitle(self: *const BrowserContext) ?[]const u8 { const frame = self.session.currentFrame() orelse return null; return frame.getTitle() catch |err| { log.err(.cdp, "page title", .{ .err = err }); return null; }; } pub fn networkEnable(self: *BrowserContext) !void { try self.notification.register(.http_request_fail, self, onHttpRequestFail); try self.notification.register(.http_request_start, self, onHttpRequestStart); try self.notification.register(.http_request_done, self, onHttpRequestDone); try self.notification.register(.http_response_data, self, onHttpResponseData); try self.notification.register(.http_response_header_done, self, onHttpResponseHeadersDone); try self.notification.register(.http_request_served_from_cache, self, onHttpRequestServedFromCache); } pub fn networkDisable(self: *BrowserContext) void { self.notification.unregister(.http_request_fail, self); self.notification.unregister(.http_request_start, self); self.notification.unregister(.http_request_done, self); self.notification.unregister(.http_response_data, self); self.notification.unregister(.http_response_header_done, self); self.notification.unregister(.http_request_served_from_cache, self); } pub fn fetchEnable(self: *BrowserContext, authRequests: bool) !void { try self.notification.register(.http_request_intercept, self, onHttpRequestIntercept); if (authRequests) { try self.notification.register(.http_request_auth_required, self, onHttpRequestAuthRequired); } } pub fn fetchDisable(self: *BrowserContext) void { self.notification.unregister(.http_request_intercept, self); self.notification.unregister(.http_request_auth_required, self); } pub fn lifecycleEventsEnable(self: *BrowserContext) !void { self.page_life_cycle_events = true; try self.notification.register(.frame_network_idle, self, onFrameNetworkIdle); try self.notification.register(.frame_network_almost_idle, self, onFrameNetworkAlmostIdle); } pub fn lifecycleEventsDisable(self: *BrowserContext) void { self.page_life_cycle_events = false; self.notification.unregister(.frame_network_idle, self); self.notification.unregister(.frame_network_almost_idle, self); } pub fn consoleEnable(self: *BrowserContext) !void { try self.notification.register(.console_message, self, onConsoleMessage); } pub fn consoleDisable(self: *BrowserContext) void { self.notification.unregister(.console_message, self); } pub fn runtimeEnable(self: *BrowserContext) !void { try self.notification.register(.runtime_console_message, self, onRuntimeConsoleMessage); } pub fn runtimeDisable(self: *BrowserContext) void { self.notification.unregister(.runtime_console_message, self); } pub fn webmcpEnable(self: *BrowserContext) !void { try self.notification.register(.model_context_tool_added, self, onModelContextToolAdded); try self.notification.register(.model_context_tool_removed, self, onModelContextToolRemoved); } pub fn webmcpDisable(self: *BrowserContext) void { self.notification.unregister(.model_context_tool_added, self); self.notification.unregister(.model_context_tool_removed, self); } pub fn onModelContextToolAdded(ctx: *anyopaque, event: *const Notification.ModelContextToolEvent) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/webmcp.zig").onToolAdded(self, event); } pub fn onModelContextToolRemoved(ctx: *anyopaque, event: *const Notification.ModelContextToolEvent) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/webmcp.zig").onToolRemoved(self, event); } pub fn onFrameRemove(ctx: *anyopaque, _: Notification.FrameRemove) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); @import("domains/page.zig").frameRemove(self); } pub fn onFrameCreated(ctx: *anyopaque, frame: *Frame) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameCreated(self, frame); } pub fn onFrameNavigate(ctx: *anyopaque, msg: *const Notification.FrameNavigate) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameNavigate(self, msg); } pub fn onFrameNavigated(ctx: *anyopaque, msg: *const Notification.FrameNavigated) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); return @import("domains/page.zig").frameNavigated(self.notification_arena, self, msg); } pub fn onFrameChildFrameCreated(ctx: *anyopaque, msg: *const Notification.FrameChildFrameCreated) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameChildFrameCreated(self, msg); } pub fn onFrameNetworkIdle(ctx: *anyopaque, msg: *const Notification.FrameNetworkIdle) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameNetworkIdle(self, msg); } pub fn onFrameNetworkAlmostIdle(ctx: *anyopaque, msg: *const Notification.FrameNetworkAlmostIdle) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameNetworkAlmostIdle(self, msg); } pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); try @import("domains/network.zig").httpRequestStart(self, msg); } pub fn onHttpRequestIntercept(ctx: *anyopaque, msg: *const Notification.RequestIntercept) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); try @import("domains/fetch.zig").requestIntercept(self, msg); } pub fn onHttpRequestFail(ctx: *anyopaque, msg: *const Notification.RequestFail) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/network.zig").httpRequestFail(self, msg); } pub fn onFrameDOMContentLoaded(ctx: *anyopaque, msg: *const Notification.FrameDOMContentLoaded) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameDOMContentLoaded(self, msg); } pub fn onFrameLoaded(ctx: *anyopaque, msg: *const Notification.FrameLoaded) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").frameLoaded(self, msg); } pub fn onJavascriptDialogOpening(ctx: *anyopaque, msg: *const Notification.JavascriptDialogOpening) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/page.zig").javascriptDialogOpening(self, msg); } fn keyFromTransfer(transfer: *const Transfer) CDP.BrowserContext.CapturedResponseKey { return if (transfer.req.resource_type == .document) .{ .kind = .loader, .id = transfer.req.loader_id } else .{ .kind = .request, .id = transfer.id }; } pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); const arena = self.frame_arena; // Prepare the captured response value. const key = keyFromTransfer(msg.transfer); const gop = try self.captured_responses.getOrPut(arena, key); if (!gop.found_existing) { gop.value_ptr.* = .{ .data = .empty, // Encode the data in base64 by default, but don't encode // for well known content-type. .must_encode = blk: { const response = msg.response; if (response.contentType()) |ct| { const mime = try Mime.parse(ct); if (!mime.isText()) { break :blk true; } if (std.mem.eql(u8, "UTF-8", mime.charsetString())) { break :blk false; } } break :blk true; }, }; } return @import("domains/network.zig").httpResponseHeaderDone(self.notification_arena, self, msg); } pub fn onHttpRequestDone(ctx: *anyopaque, msg: *const Notification.RequestDone) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/network.zig").httpRequestDone(self, msg); } pub fn onHttpResponseData(ctx: *anyopaque, msg: *const Notification.ResponseData) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); const arena = self.frame_arena; const key = keyFromTransfer(msg.transfer); const resp = self.captured_responses.getPtr(key) orelse lp.assert(false, "onHttpResponseData missing captured response", .{}); return resp.data.appendSlice(arena, msg.data); } pub fn onHttpRequestAuthRequired(ctx: *anyopaque, data: *const Notification.RequestAuthRequired) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); try @import("domains/fetch.zig").requestAuthRequired(self, data); } pub fn onHttpRequestServedFromCache(ctx: *anyopaque, msg: *const Notification.RequestServedFromCache) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); return @import("domains/network.zig").httpServedFromCache(self, msg); } pub fn onConsoleMessage(ctx: *anyopaque, msg: *const Notification.ConsoleMessage) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); return @import("domains/console.zig").consoleMessage(self.notification_arena, self, msg); } pub fn onRuntimeConsoleMessage(ctx: *anyopaque, msg: *const Notification.ConsoleMessage) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); return @import("domains/runtime.zig").consoleMessage(self.notification_arena, self, msg); } fn resetNotificationArena(self: *BrowserContext) void { defer _ = self.cdp.notification_arena.reset(.{ .retain_with_limit = 1024 * 64 }); } pub fn callInspector(self: *const BrowserContext, msg: []const u8) void { self.inspector_session.send(msg); self.session.browser.env.runMicrotasks(); } pub fn onInspectorResponse(ctx: *anyopaque, _: u32, msg: []const u8) void { sendInspectorMessage(@ptrCast(@alignCast(ctx)), msg) catch |err| { log.err(.cdp, "send inspector response", .{ .err = err }); }; } pub fn onInspectorEvent(ctx: *anyopaque, msg: []const u8) void { if (log.enabled(.cdp, .debug)) { // msg should be {"method":,... lp.assert(std.mem.startsWith(u8, msg, "{\"method\":"), "onInspectorEvent prefix", .{}); const method_end = std.mem.indexOfScalar(u8, msg, ',') orelse { log.err(.cdp, "invalid inspector event", .{ .msg = msg }); return; }; const method = msg[10..method_end]; log.debug(.cdp, "inspector event", .{ .method = method }); } sendInspectorMessage(@ptrCast(@alignCast(ctx)), msg) catch |err| { log.err(.cdp, "send inspector event", .{ .err = err }); }; } // This is hacky x 2. First, we create the JSON payload by gluing our // session_id onto it. Second, we're much more client/websocket aware than // we should be. fn sendInspectorMessage(self: *BrowserContext, msg: []const u8) !void { const session_id = self.session_id orelse { // We no longer have an active session. What should we do // in this case? return; }; const cdp = self.cdp; const allocator = cdp.conn.send_arena.allocator(); const field = ",\"sessionId\":\""; // + 1 for the closing quote after the session id // + 10 for the max websocket header const message_len = msg.len + session_id.len + 1 + field.len + 10; var buf: std.ArrayList(u8) = .{}; buf.ensureTotalCapacity(allocator, message_len) catch |err| { log.err(.cdp, "inspector buffer", .{ .err = err }); return; }; // reserve 10 bytes for websocket header buf.appendSliceAssumeCapacity(&.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); // -1 because we dont' want the closing brace '}' buf.appendSliceAssumeCapacity(msg[0 .. msg.len - 1]); buf.appendSliceAssumeCapacity(field); buf.appendSliceAssumeCapacity(session_id); buf.appendSliceAssumeCapacity("\"}"); if (comptime IS_DEBUG) { std.debug.assert(buf.items.len == message_len); } try cdp.conn.sendJSONRaw(buf); } }; /// see: https://chromium.googlesource.com/chromium/src/+/master/third_party/blink/renderer/bindings/core/v8/V8BindingDesign.md#world /// The current understanding. An isolated world lives in the same isolate, but a separated context. /// Clients create this to be able to create variables and run code without interfering with the /// normal namespace and values of the webframe. Similar to the main context we need to pretend to recreate it after /// a executionContextsCleared event which happens when navigating to a new frame. A client can have a command be executed const ScriptOnNewDocument = struct { identifier: u32, source: []const u8, }; /// in the isolated world by using its Context ID or the worldName. /// grantUniversalAccess Indicates whether the isolated world can reference objects like the DOM or other JS Objects. /// An isolated world has it's own instance of globals like Window. /// Generally the client needs to resolve a node into the isolated world to be able to work with it. /// An object id is unique across all contexts, different object ids can refer to the same Node in different contexts. const IsolatedWorld = struct { arena: Allocator, call_arena: Allocator, browser: *Browser, name: []const u8, context: ?*js.Context = null, grant_universal_access: bool, // Identity tracking for this isolated world (separate from main world). // This ensures CDP inspector contexts don't share v8::Globals with main world. identity: js.Identity = .{}, pub fn deinit(self: *IsolatedWorld) void { self.removeContext(); self.browser.arena_pool.release(self.call_arena); self.browser.arena_pool.release(self.arena); } pub fn removeContext(self: *IsolatedWorld) void { if (self.context) |ctx| { self.browser.env.destroyContext(ctx); self.context = null; } // I don't think it's possible to have any identity without a context, // but there's no harm in being safe. self.identity.deinit(); self.identity = .{}; } // The isolate world must share at least some of the state with the related frame, specifically the DocumentHTML // (assuming grantUniversalAccess will be set to True!). // We just created the world and the frame. The frame's state lives in the session, but is update on navigation. // This also means this pointer becomes invalid after removePage until a new frame is created. // Currently we have only 1 frame and thus also only 1 state in the isolate world. pub fn createContext(self: *IsolatedWorld, frame: *Frame) !*js.Context { if (self.context == null) { const ctx = try self.browser.env.createContext(frame, .{ .identity = &self.identity, .identity_arena = self.arena, .call_arena = self.call_arena, .debug_name = "IsolatedContext", }); self.context = ctx; } else { log.warn(.cdp, "not implemented", .{ .feature = "createContext: Not implemented second isolated context creation", .info = "reuse existing context", }); } return self.context.?; } }; // This is a generic because when we send a result we have two different // behaviors. Normally, we're sending the result to the client. But in some cases // we want to capture the result. So we want the command.sendResult to be // generic. pub const Command = struct { // A misc arena that can be used for any allocation for processing // the message arena: Allocator, // reference to our CDP instance cdp: *CDP, // The browser context this command targets browser_context: ?*BrowserContext, // The command input (the id, optional session_id, params, ...) input: Input, // In most cases, Sender is going to be cdp itself. We'll call // sender.sendJSON() and CDP will send it to the client. But some // commands are dispatched internally, in which cases the Sender will // be code to capture the data that we were "sending". sender: Sender, const Sender = union(enum) { cdp: *CDP, capture: *std.Io.Writer, pub fn sendJSON(self: Sender, message: anytype) !void { switch (self) { .cdp => |cdp| return cdp.sendJSON(message), .capture => |writer| { return std.json.Stringify.value(message, .{ .emit_null_optional_fields = false, }, writer); }, } } }; pub fn params(self: *const Command, comptime T: type) !?T { if (self.input.params) |p| { return try json.parseFromSliceLeaky( T, self.arena, p.raw, .{ .ignore_unknown_fields = true }, ); } return null; } pub fn createBrowserContext(self: *Command) !*BrowserContext { _ = try self.cdp.createBrowserContext(); self.browser_context = &(self.cdp.browser_context.?); return self.browser_context.?; } const SendResultOpts = struct { include_session_id: bool = true, }; pub fn sendResult(self: *Command, result: anytype, opts: SendResultOpts) !void { return self.sender.sendJSON(.{ .id = self.input.id, .result = if (comptime @typeInfo(@TypeOf(result)) == .null) struct {}{} else result, .sessionId = if (opts.include_session_id) self.input.session_id else null, }); } pub fn sendEvent(self: *Command, method: []const u8, p: anytype, opts: SendEventOpts) !void { // Events ALWAYS go to the client. self.sender should not be used return self.cdp.sendEvent(method, p, opts); } const SendErrorOpts = struct { include_session_id: bool = true, }; pub fn sendError(self: *Command, code: i32, message: []const u8, opts: SendErrorOpts) !void { return self.sender.sendJSON(.{ .id = self.input.id, .@"error" = .{ .code = code, .message = message }, .sessionId = if (opts.include_session_id) self.input.session_id else null, }); } const Input = struct { // When we reply to a message, we echo back the message id id: ?i64, // The "action" of the message.Given a method of "LOG.enable", the // action is "enable" action: []const u8, // See notes in BrowserContext about session_id session_id: ?[]const u8, // Unparsed / untyped input.params. params: ?InputParams, // The full raw json input json: []const u8, }; }; // When we parse a JSON message from the client, this is the structure // we always expect. Parsed on the Network thread inside // Connection.handleMessage; the slices reference the raw JSON bytes // (or arena allocations for fields that needed unescaping). Both // outlive the InputMessage for the inbox message's lifetime. pub const InputMessage = struct { id: ?i64 = null, method: []const u8, params: ?InputParams = null, sessionId: ?[]const u8 = null, }; // The JSON "params" field changes based on the "method". Initially, we just // capture the raw json object (including the opening and closing braces). // Then, when we're processing the message, and we know what type it is, we // can parse it (in Disaptch(T).params). pub const InputParams = struct { raw: []const u8, pub fn jsonParse( _: Allocator, scanner: *json.Scanner, _: json.ParseOptions, ) !InputParams { const height = scanner.stackHeight(); const start = scanner.cursor; if (try scanner.next() != .object_begin) { return error.UnexpectedToken; } try scanner.skipUntilStackHeight(height); const end = scanner.cursor; return .{ .raw = scanner.input[start..end] }; } }; fn asUint(comptime T: type, comptime string: []const u8) T { return @bitCast(string[0..string.len].*); } const testing = @import("testing.zig"); test "cdp: invalid json" { var ctx = try testing.context(); defer ctx.deinit(); try testing.expectError(error.InvalidJSON, ctx.processMessage("invalid")); // method is required try testing.expectError(error.InvalidJSON, ctx.processMessage(.{})); try ctx.processMessage(.{ .method = "Target", }); try ctx.expectSentError(-31998, "InvalidMethod", .{}); try ctx.processMessage(.{ .method = "Unknown.domain", }); try ctx.expectSentError(-31998, "UnknownDomain", .{}); try ctx.processMessage(.{ .method = "Target.over9000", }); try ctx.expectSentError(-31998, "UnknownMethod", .{}); } test "cdp: invalid sessionId" { var ctx = try testing.context(); defer ctx.deinit(); { // we have no browser context try ctx.processMessage(.{ .method = "Hi", .sessionId = "nope" }); try ctx.expectSentError(-32001, "Unknown sessionId", .{}); } { // we have a browser context but no session_id _ = try ctx.loadBrowserContext(.{}); try ctx.processMessage(.{ .method = "Hi", .sessionId = "BC-Has-No-SessionId" }); try ctx.expectSentError(-32001, "Unknown sessionId", .{}); } { // we have a browser context with a different session_id _ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" }); try ctx.processMessage(.{ .method = "Hi", .sessionId = "SESS-1" }); try ctx.expectSentError(-32001, "Unknown sessionId", .{}); } } test "cdp: STARTUP sessionId" { var ctx = try testing.context(); defer ctx.deinit(); { // we have no browser context try ctx.processMessage(.{ .id = 2, .method = "Hi", .sessionId = "STARTUP" }); try ctx.expectSentResult(null, .{ .id = 2, .index = 0, .session_id = "STARTUP" }); } { // we have a browser context but no session_id _ = try ctx.loadBrowserContext(.{}); try ctx.processMessage(.{ .id = 3, .method = "Hi", .sessionId = "STARTUP" }); try ctx.expectSentResult(null, .{ .id = 3, .index = 1, .session_id = "STARTUP" }); } { // we have a browser context with a different session_id _ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" }); try ctx.processMessage(.{ .id = 4, .method = "Hi", .sessionId = "STARTUP" }); try ctx.expectSentResult(null, .{ .id = 4, .index = 2, .session_id = "STARTUP" }); } }