Files
browser/src/cdp/CDP.zig
2026-05-26 15:28:54 +01:00

1324 lines
53 KiB
Zig

// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const lp = @import("lightpanda");
const App = @import("../App.zig");
const Inbox = @import("../Inbox.zig");
const Network = @import("../network/Network.zig");
const Notification = @import("../Notification.zig");
const WS = @import("../network/WS.zig");
const js = @import("../browser/js/js.zig");
const Browser = @import("../browser/Browser.zig");
const Session = @import("../browser/Session.zig");
const Frame = @import("../browser/Frame.zig");
const Mime = @import("../browser/Mime.zig");
const Element = @import("../browser/webapi/Element.zig");
const Label = @import("../browser/webapi/element/html/Label.zig");
const Transfer = @import("../browser/HttpClient.zig").Transfer;
const Connection = @import("Connection.zig");
const Incrementing = @import("id.zig").Incrementing;
const InterceptState = @import("domains/fetch.zig").InterceptState;
const log = lp.log;
const json = std.json;
const posix = std.posix;
const Allocator = std.mem.Allocator;
pub const URL_BASE = "chrome://newtab/";
const IS_DEBUG = @import("builtin").mode == .Debug;
const SessionIdGen = Incrementing(u32, "SID");
const BrowserContextIdGen = Incrementing(u32, "BID");
// webmcp tool invocation
pub const InvocationIdGen = Incrementing(u32, "INV");
// Generic so that we can inject mocks into it.
const CDP = @This();
app: *App,
conn: Connection,
browser: Browser,
allocator: Allocator,
// Network-thread read-side handle for the CDP socket. Populated in
// init; Server.handleConnection calls network.registerCdp(&cdp.link)
// after the worker-side handshake completes, and unregisterCdp before
// teardown.
link: Network.CdpLink,
// when true, any target creation must be attached.
target_auto_attach: bool = false,
session_id_gen: SessionIdGen = .{},
browser_context_id_gen: BrowserContextIdGen = .{},
browser_context: ?BrowserContext,
// Re-used arena for processing a message. We're assuming that we're getting
// 1 message at a time.
message_arena: std.heap.ArenaAllocator,
// Used for processing notifications within a browser context.
notification_arena: std.heap.ArenaAllocator,
// Valid for 1 frame navigation (what CDP calls a "renderer")
frame_arena: std.heap.ArenaAllocator,
// Valid for the entire lifetime of the BrowserContext. Should minimize
// (or altogether eliminate) our use of this.
browser_context_arena: std.heap.ArenaAllocator,
pub fn init(
self: *CDP,
app: *App,
socket: posix.socket_t,
json_version_response: []const u8,
) !void {
const allocator = app.allocator;
self.* = .{
.app = app,
.link = undefined,
.conn = undefined,
.browser = undefined,
.allocator = allocator,
.browser_context = null,
.frame_arena = std.heap.ArenaAllocator.init(allocator),
.message_arena = std.heap.ArenaAllocator.init(allocator),
.notification_arena = std.heap.ArenaAllocator.init(allocator),
.browser_context_arena = std.heap.ArenaAllocator.init(allocator),
};
try self.browser.init(app, .{ .env = .{ .with_inspector = true } }, self);
const http_client = &self.browser.http_client;
try self.conn.init(allocator, socket, json_version_response, &http_client.inbox, &app.arena_pool);
errdefer self.conn.deinit();
self.link = .{
.cdp = self,
.state = .live,
.socket = socket,
.handle = http_client.handle,
};
}
pub fn deinit(self: *CDP) void {
if (self.browser_context) |*bc| {
bc.deinit();
}
self.browser.deinit();
self.frame_arena.deinit();
self.message_arena.deinit();
self.notification_arena.deinit();
self.browser_context_arena.deinit();
self.conn.deinit();
}
// Called by Network when readable bytes arrive on the CDP socket.
// Feeds them through the WS framer and pushes each parsed frame into
// the worker's inbox. Returns false if a close frame was seen (or a
// fatal frame error) so Network drops the link from its poll set.
//
// One network read can carry more bytes than the reader's current
// free space — large CDP messages (Page.addScriptToEvaluateOnNewDocument,
// Runtime evaluation results, etc.) routinely exceed 16 KB, and a
// single read can contain multiple messages, or part of messages. We loop: feed
// what fits, run processMessages (which extracts complete frames, compacts the
// reader, and grows the buffer if it sees a frame header larger than current
// capacity), repeat until the chunk is drained.
pub fn onData(self: *CDP, data: []const u8) anyerror!bool {
var remaining = data;
while (remaining.len > 0) {
const n = self.conn.feedBytes(remaining);
remaining = remaining[n..];
if ((try self.conn.processMessages()) == false) {
return false;
}
}
return true;
}
// Called by Network when it drops the link unsolicited (peer EOF, read
// error, poll HUP/ERR). Push a disconnect into the inbox so the
// worker's drainInbox surfaces error.ClientDisconnected.
pub fn onLinkDisconnect(self: *CDP, err: ?anyerror) void {
const arena = self.browser.arena_pool.acquire(.tiny, "cdp disconnect") catch |e| switch (e) {
error.OutOfMemory => @panic("OOM"),
};
self.browser.http_client.inbox.push(arena, .{ .disconnect = err });
}
// Called in the Worker to dispatch a single CDP message bubbled up by
// HttpClient.drainInbox. The Network thread already parsed the JSON
// when it pushed the message to the inbox, so we skip straight to
// dispatchParsed without re-parsing. `c.raw` and `c.arena` outlive
// this call (they're owned by the inbox Message which drainInbox
// frees right after we return), so `c.input`'s string slices stay
// valid for the duration of dispatch.
pub fn onMessage(self: *CDP, c: *Inbox.Message.Cdp) anyerror!void {
const arena = &self.message_arena;
defer _ = arena.reset(.{ .retain_with_limit = 1024 * 16 });
return self.dispatchParsed(arena.allocator(), .{ .cdp = self }, c.raw, c.input);
}
// Parse + dispatch a raw JSON CDP message. Used by tests (which
// don't go through the Network thread / inbox pipeline) and by any
// caller that has bytes rather than a pre-parsed InputMessage.
pub fn processMessage(self: *CDP, msg: []const u8) !void {
const arena = &self.message_arena;
defer _ = arena.reset(.{ .retain_with_limit = 1024 * 16 });
return self.dispatch(arena.allocator(), .{ .cdp = self }, msg);
}
// Called in the worker when a PING message is received
pub fn onPing(self: *CDP, body: []const u8) void {
self.conn.sendPong(body) catch |err| {
log.warn(.app, "CDP pong", .{ .err = err });
};
}
// Called in the Worker when a peer-initiated close with CLOSE_NORMAL. The worker
// loop tears down immediately after; drainInbox returns
// error.ClientDisconnected once we return.
pub fn onClose(self: *CDP) void {
self.conn.send(&WS.CLOSE_NORMAL) catch |err| {
log.warn(.app, "CDP close reply", .{ .err = err });
};
}
// Called in the Worker when the peer disconnected (peer EOF, fatal frame error,
// or right after a peer close was replied to). drainInbox returns
// error.ClientDisconnected, which the worker loop catches to tear down.
//
// If `err` is a recognized WS framing error, send the matching close
// frame back to the peer before tearing down — that's how clients
// observe protocol violations (close code 1002 / 1009 / etc.).
pub fn onDisconnect(self: *CDP, err: ?anyerror) void {
if (err) |e| {
if (WS.errorReply(e)) |close_frame| {
self.conn.send(close_frame) catch {};
}
}
log.info(.cdp, "CDP disconnect", .{ .err = err });
}
pub fn sendJSON(self: *CDP, message: anytype) !void {
try self.conn.sendJSON(message, .{ .emit_null_optional_fields = false });
}
pub fn tick(self: *CDP) !bool {
// Liveness is enforced by TCP keepalive configured in
// Server.configureSocket; the wakeup lets V8 run or terminate.
const wait_ms: u32 = 1000; // 1s
self.pageWait(wait_ms) catch |wait_err| switch (wait_err) {
error.NoPage => {
// No active page yet (or a teardown is in flight). Fall
// back to ticking the http client directly so CDP messages
// still get dispatched.
self.browser.http_client.tick(wait_ms, .all) catch |err| switch (err) {
error.ClientDisconnected => return false,
else => {
log.err(.app, "http tick", .{ .err = err });
return false;
},
};
},
error.ClientDisconnected => return false,
else => return wait_err,
};
return true;
}
fn pageWait(self: *CDP, ms: u32) !void {
const session = &(self.browser.session orelse return error.NoPage);
var runner = try session.runner(.{});
return runner.waitCDP(.{ .ms = ms });
}
// Parse-then-dispatch entry point. Used by:
// - CDP.processMessage (tests, and any other caller that hands us
// raw JSON bytes).
// - Target.sendMessageToTarget (a CDP command that wraps another
// CDP message as a string parameter and forwards it through the
// dispatch table).
// The normal Network-thread path doesn't go through here — it parses
// once on the Network thread and reaches dispatchParsed directly via
// onMessage.
pub fn dispatch(self: *CDP, arena: Allocator, sender: Command.Sender, str: []const u8) !void {
const input = json.parseFromSliceLeaky(InputMessage, arena, str, .{
.ignore_unknown_fields = true,
}) catch return error.InvalidJSON;
return self.dispatchParsed(arena, sender, str, input);
}
// Dispatch a pre-parsed CDP message. The caller is responsible for
// keeping `str` and the backing storage for `input`'s string slices
// alive for the duration of the call.
fn dispatchParsed(self: *CDP, arena: Allocator, sender: Command.Sender, str: []const u8, input: InputMessage) !void {
var command = Command{
.input = .{
.json = str,
.id = input.id,
.action = "",
.params = input.params,
.session_id = input.sessionId,
},
.cdp = self,
.arena = arena,
.sender = sender,
.browser_context = if (self.browser_context) |*bc| bc else null,
};
// See dispatchStartupCommand for more info on this.
var is_startup = false;
if (input.sessionId) |input_session_id| {
if (std.mem.eql(u8, input_session_id, "STARTUP")) {
is_startup = true;
} else if (self.isValidSessionId(input_session_id) == false) {
return command.sendError(-32001, "Unknown sessionId", .{});
}
}
if (is_startup) {
dispatchStartupCommand(&command, input.method) catch |err| {
command.sendError(-31999, @errorName(err), .{}) catch return err;
};
} else {
dispatchCommand(&command, input.method) catch |err| {
command.sendError(-31998, @errorName(err), .{}) catch return err;
};
}
}
// A CDP session isn't 100% fully driven by the driver. There's are
// independent actions that the browser is expected to take. For example
// Puppeteer expects the browser to startup a tab and thus have existing
// targets.
// To this end, we create a [very] dummy BrowserContext, Target and
// Session. There isn't actually a BrowserContext, just a special id.
// When messages are received with the "STARTUP" sessionId, we do
// "special" handling - the bare minimum we need to do until the driver
// switches to a real BrowserContext.
// (I can imagine this logic will become driver-specific)
fn dispatchStartupCommand(command: *Command, method: []const u8) !void {
// Stagehand parses the response and error if we don't return a
// correct one for Page.getFrameTree on startup call.
if (std.mem.eql(u8, method, "Page.getFrameTree")) {
// The Page.getFrameTree handles startup response gracefully.
return dispatchCommand(command, method);
}
return command.sendResult(null, .{});
}
fn dispatchCommand(command: *Command, method: []const u8) !void {
const domain = blk: {
const i = std.mem.indexOfScalarPos(u8, method, 0, '.') orelse {
return error.InvalidMethod;
};
command.input.action = method[i + 1 ..];
break :blk method[0..i];
};
switch (domain.len) {
2 => switch (@as(u16, @bitCast(domain[0..2].*))) {
asUint(u16, "LP") => return @import("domains/lp.zig").processMessage(command),
else => {},
},
3 => switch (@as(u24, @bitCast(domain[0..3].*))) {
asUint(u24, "DOM") => return @import("domains/dom.zig").processMessage(command),
asUint(u24, "Log") => return @import("domains/log.zig").processMessage(command),
asUint(u24, "CSS") => return @import("domains/css.zig").processMessage(command),
else => {},
},
4 => switch (@as(u32, @bitCast(domain[0..4].*))) {
asUint(u32, "Page") => return @import("domains/page.zig").processMessage(command),
else => {},
},
5 => switch (@as(u40, @bitCast(domain[0..5].*))) {
asUint(u40, "Fetch") => return @import("domains/fetch.zig").processMessage(command),
asUint(u40, "Input") => return @import("domains/input.zig").processMessage(command),
else => {},
},
6 => switch (@as(u48, @bitCast(domain[0..6].*))) {
asUint(u48, "Target") => return @import("domains/target.zig").processMessage(command),
asUint(u48, "Audits") => return @import("domains/audits.zig").processMessage(command),
asUint(u48, "WebMCP") => return @import("domains/webmcp.zig").processMessage(command),
else => {},
},
7 => switch (@as(u56, @bitCast(domain[0..7].*))) {
asUint(u56, "Browser") => return @import("domains/browser.zig").processMessage(command),
asUint(u56, "Runtime") => return @import("domains/runtime.zig").processMessage(command),
asUint(u56, "Network") => return @import("domains/network.zig").processMessage(command),
asUint(u56, "Storage") => return @import("domains/storage.zig").processMessage(command),
asUint(u56, "Console") => return @import("domains/console.zig").processMessage(command),
else => {},
},
8 => switch (@as(u64, @bitCast(domain[0..8].*))) {
asUint(u64, "Security") => return @import("domains/security.zig").processMessage(command),
else => {},
},
9 => switch (@as(u72, @bitCast(domain[0..9].*))) {
asUint(u72, "Emulation") => return @import("domains/emulation.zig").processMessage(command),
asUint(u72, "Inspector") => return @import("domains/inspector.zig").processMessage(command),
else => {},
},
11 => switch (@as(u88, @bitCast(domain[0..11].*))) {
asUint(u88, "Performance") => return @import("domains/performance.zig").processMessage(command),
else => {},
},
13 => switch (@as(u104, @bitCast(domain[0..13].*))) {
asUint(u104, "Accessibility") => return @import("domains/accessibility.zig").processMessage(command),
else => {},
},
else => {},
}
return error.UnknownDomain;
}
fn isValidSessionId(self: *const CDP, input_session_id: []const u8) bool {
const browser_context = &(self.browser_context orelse return false);
const session_id = browser_context.session_id orelse return false;
return std.mem.eql(u8, session_id, input_session_id);
}
pub fn createBrowserContext(self: *CDP) ![]const u8 {
if (self.browser_context != null) {
return error.AlreadyExists;
}
const id = self.browser_context_id_gen.next();
self.browser_context = @as(BrowserContext, undefined);
const browser_context = &self.browser_context.?;
try BrowserContext.init(browser_context, id, self);
return id;
}
pub fn disposeBrowserContext(self: *CDP, browser_context_id: []const u8) bool {
const bc = &(self.browser_context orelse return false);
if (std.mem.eql(u8, bc.id, browser_context_id) == false) {
return false;
}
bc.deinit();
self.browser.closeSession();
self.browser_context = null;
_ = self.browser_context_arena.reset(.{ .retain_with_limit = 1024 * 16 });
return true;
}
const SendEventOpts = struct {
session_id: ?[]const u8 = null,
};
pub fn sendEvent(self: *CDP, method: []const u8, p: anytype, opts: SendEventOpts) !void {
return self.sendJSON(.{
.method = method,
.params = if (comptime @typeInfo(@TypeOf(p)) == .null) struct {}{} else p,
.sessionId = opts.session_id,
});
}
pub const BrowserContext = struct {
const Node = @import("Node.zig");
const AXNode = @import("AXNode.zig");
const CapturedResponse = struct {
must_encode: bool,
data: std.ArrayList(u8),
};
// Key for `captured_responses`. Documents are keyed by `loader_id`,
// everything else by `request_id` — the two id-spaces are independent
// counters and overlap numerically (loader 1 / request 1, loader 2 /
// request 2, ...), so the map key has to carry the namespace or
// entries collide. The wire-format prefix (`LID-` / `REQ-`) provides
// the same disambiguation on lookup; see `idFromRequestId` in
// domains/network.zig.
pub const CapturedResponseKey = struct {
kind: enum { request, loader },
id: u32,
};
id: []const u8,
cdp: *CDP,
// Represents the browser session. There is no equivalent in CDP. For
// all intents and purpose, from CDP's point of view our Browser and
// our Session more or less maps to a BrowserContext. THIS HAS ZERO
// RELATION TO SESSION_ID
session: *Session,
// Tied to the lifetime of the BrowserContext
arena: Allocator,
// Tied to the lifetime of 1 page rendered in the BrowserContext.
frame_arena: Allocator,
// From the parent's notification_arena.allocator(). Most of the CDP
// code paths deal with a cmd which has its own arena (from the
// message_arena). But notifications happen outside of the typical CDP
// request->response, and thus don't have a cmd and don't have an arena.
notification_arena: Allocator,
// Maps to our Page. (There are other types of targets, but we only
// deal with "pages" for now). Since we only allow 1 open page at a
// time, we only have 1 target_id.
target_id: ?[14]u8,
// The CDP session_id. After the target/page is created, the client
// "attaches" to it (either explicitly or automatically). We return a
// "sessionId" which identifies this link. `sessionId` is the how
// the CDP client informs us what it's trying to manipulate. Because we
// only support 1 BrowserContext at a time, and 1 page at a time, this
// is all pretty straightforward, but it still needs to be enforced, i.e.
// if we get a request with a sessionId that doesn't match the current one
// we should reject it.
session_id: ?[]const u8,
security_origin: []const u8,
page_life_cycle_events: bool,
secure_context_type: []const u8,
node_registry: Node.Registry,
node_search_list: Node.Search.List,
inspector_session: *js.Inspector.Session,
isolated_worlds: std.ArrayList(*IsolatedWorld),
// Scripts registered via Page.addScriptToEvaluateOnNewDocument.
// Evaluated in each new document after navigation completes.
scripts_on_new_document: std.ArrayList(ScriptOnNewDocument) = .empty,
next_script_id: u32 = 1,
http_proxy_changed: bool = false,
user_agent_changed: bool = false,
// Extra headers to add to all requests.
extra_headers: std.ArrayList([*c]const u8) = .empty,
intercept_state: InterceptState,
// When network is enabled, we'll capture the transfer.id -> body
// This is awfully memory intensive, but our underlying http client and
// its users (script manager and frame) correctly do not hold the body
// memory longer than they have to. In fact, the main request is only
// ever streamed. So if CDP is the only thing that needs bodies in
// memory for an arbitrary amount of time, then that's where we're going
// to store the,
captured_responses: std.AutoHashMapUnmanaged(CapturedResponseKey, CapturedResponse),
notification: *Notification,
// Pre-armed response for the next JS dialog (alert/confirm/prompt).
// Set by Page.handleJavaScriptDialog; consumed (and cleared) when the
// next javascript_dialog_opening notification is dispatched. Strings
// are duplicated into self.arena so they outlive the CDP command's
// own message arena.
pending_dialog_response: ?Notification.DialogResponse = null,
// webmcp tool invocation
invocation_id_gen: InvocationIdGen = .{},
// WebMCP domain state. Populated when `WebMCP.enable` is received.
webmcp_invocations: std.AutoHashMapUnmanaged(u32, *@import("domains/webmcp.zig").Invocation) = .empty,
fn init(self: *BrowserContext, id: []const u8, cdp: *CDP) !void {
const allocator = cdp.allocator;
// Create notification for this BrowserContext
const notification = try Notification.init(allocator);
errdefer notification.deinit();
const session = try cdp.browser.newSession(notification);
if (cdp.app.config.cookieFile()) |cookie_path| {
lp.cookies.loadFromFile(session, cookie_path);
}
const browser = &cdp.browser;
const inspector_session = browser.env.inspector.?.startSession(self);
errdefer browser.env.inspector.?.stopSession();
var registry = Node.Registry.init(allocator);
errdefer registry.deinit();
self.* = .{
.id = id,
.cdp = cdp,
.target_id = null,
.session_id = null,
.session = session,
.security_origin = URL_BASE,
.secure_context_type = "Secure", // TODO = enum
.page_life_cycle_events = false, // TODO; Target based value
.node_registry = registry,
.node_search_list = undefined,
.isolated_worlds = .empty,
.inspector_session = inspector_session,
.frame_arena = cdp.frame_arena.allocator(),
.arena = cdp.browser_context_arena.allocator(),
.notification_arena = cdp.notification_arena.allocator(),
.intercept_state = try InterceptState.init(allocator),
.captured_responses = .empty,
.notification = notification,
};
self.node_search_list = Node.Search.List.init(allocator, &self.node_registry);
errdefer self.deinit();
try notification.register(.frame_remove, self, onFrameRemove);
try notification.register(.frame_created, self, onFrameCreated);
try notification.register(.frame_navigate, self, onFrameNavigate);
try notification.register(.frame_navigated, self, onFrameNavigated);
try notification.register(.frame_child_frame_created, self, onFrameChildFrameCreated);
try notification.register(.frame_dom_content_loaded, self, onFrameDOMContentLoaded);
try notification.register(.frame_loaded, self, onFrameLoaded);
try notification.register(.javascript_dialog_opening, self, onJavascriptDialogOpening);
}
pub fn deinit(self: *BrowserContext) void {
const browser = &self.cdp.browser;
const env = &browser.env;
// resetContextGroup detach the inspector from all contexts.
// It appends async tasks, so we make sure we run the message loop
// before deinit it.
env.inspector.?.resetContextGroup();
env.inspector.?.stopSession();
// abort all intercepted requests before closing the session/page
// since some of these might callback into the page/scriptmanager.
// intercept_state stores ids — look each one up; if it's already
// gone (out-of-band destroy), there's nothing to abort, but the
// intercepted counter still needs decrementing because we
// incremented it on pause.
const http_client = &browser.http_client;
for (self.intercept_state.pendingIntercepts()) |transfer_id| {
lp.assert(
http_client.interception_layer.intercepted > 0,
"BrowserContext.deinit.intercepted",
.{ .value = http_client.interception_layer.intercepted },
);
http_client.interception_layer.intercepted -= 1;
if (http_client.findTransfer(transfer_id)) |transfer| {
transfer.abort(error.ClientDisconnect);
}
}
// Notify any CDP client waiting on an in-flight WebMCP invocation
// before the V8 context (and its promise callbacks) get torn down
// by browser.closeSession below.
@import("domains/webmcp.zig").cancelAllPending(self);
for (self.isolated_worlds.items) |world| {
world.deinit();
}
self.isolated_worlds.clearRetainingCapacity();
// do this before closeSession, since we don't want to process any
// new notification (Or maybe, instead of the deinit above, we just
// rely on those notifications to do our normal cleanup?)
self.notification.unregisterAll(self);
// If the session has a frame, we need to clear it first. The page
// context is always nested inside of the isolated world context,
// so we need to shutdown the page one first.
browser.closeSession();
self.node_registry.deinit();
self.node_search_list.deinit();
self.notification.deinit();
if (self.http_proxy_changed) {
// has to be called after browser.closeSession, since it won't
// work if there are active connections.
browser.http_client.changeProxy(null) catch |err| {
log.warn(.http, "changeProxy", .{ .err = err });
};
}
if (self.user_agent_changed) {
browser.http_client.clearUserAgentOverride();
}
self.intercept_state.deinit();
}
pub fn reset(self: *BrowserContext) void {
self.node_registry.reset();
self.node_search_list.reset();
}
pub fn createIsolatedWorld(self: *BrowserContext, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld {
const browser = &self.cdp.browser;
const arena = try browser.arena_pool.acquire(.small, "IsolatedWorld");
errdefer browser.arena_pool.release(arena);
const call_arena = try browser.arena_pool.acquire(.tiny, "IsolatedWorld.call_arena");
errdefer browser.arena_pool.release(call_arena);
const world = try arena.create(IsolatedWorld);
world.* = .{
.arena = arena,
.call_arena = call_arena,
.context = null,
.browser = browser,
.name = try arena.dupe(u8, world_name),
.grant_universal_access = grant_universal_access,
};
try self.isolated_worlds.append(self.arena, world);
return world;
}
pub fn nodeWriter(self: *BrowserContext, root: *const Node, opts: Node.Writer.Opts) Node.Writer {
return .{
.root = root,
.depth = opts.depth,
.exclude_root = opts.exclude_root,
.registry = &self.node_registry,
};
}
pub fn axnodeWriter(self: *BrowserContext, temp_arena: Allocator, root: *const Node, opts: AXNode.Writer.Opts) !AXNode.Writer {
// Bind the writer to the frame that owns the root node, not whatever
// happens to be `currentFrame`. Name resolution (`Label.findLabelByFor`
// against `ownerDocument`) and visibility checks (`frame._style_manager`)
// are per-frame; getting this wrong on cross-frame queries produces
// names/visibility from the wrong document.
const fallback = self.session.currentFrame() orelse return error.FrameNotLoaded;
const frame = root.dom.ownerFrame(fallback);
const cache = try frame.call_arena.create(Element.VisibilityCache);
cache.* = .empty;
const label_index = try frame.call_arena.create(Label.LabelByForIndex);
label_index.* = .{};
return .{
.frame = frame,
.root = root,
.registry = &self.node_registry,
.visibility_cache = cache,
.label_index = label_index,
.temp_arena = temp_arena,
.filter = opts.filter,
};
}
pub fn getURL(self: *const BrowserContext) ?[:0]const u8 {
const frame = self.session.currentFrame() orelse return null;
const url = frame.url;
return if (url.len == 0) null else url;
}
pub fn getTitle(self: *const BrowserContext) ?[]const u8 {
const frame = self.session.currentFrame() orelse return null;
return frame.getTitle() catch |err| {
log.err(.cdp, "page title", .{ .err = err });
return null;
};
}
pub fn networkEnable(self: *BrowserContext) !void {
try self.notification.register(.http_request_fail, self, onHttpRequestFail);
try self.notification.register(.http_request_start, self, onHttpRequestStart);
try self.notification.register(.http_request_done, self, onHttpRequestDone);
try self.notification.register(.http_response_data, self, onHttpResponseData);
try self.notification.register(.http_response_header_done, self, onHttpResponseHeadersDone);
try self.notification.register(.http_request_served_from_cache, self, onHttpRequestServedFromCache);
}
pub fn networkDisable(self: *BrowserContext) void {
self.notification.unregister(.http_request_fail, self);
self.notification.unregister(.http_request_start, self);
self.notification.unregister(.http_request_done, self);
self.notification.unregister(.http_response_data, self);
self.notification.unregister(.http_response_header_done, self);
self.notification.unregister(.http_request_served_from_cache, self);
}
pub fn fetchEnable(self: *BrowserContext, authRequests: bool) !void {
try self.notification.register(.http_request_intercept, self, onHttpRequestIntercept);
if (authRequests) {
try self.notification.register(.http_request_auth_required, self, onHttpRequestAuthRequired);
}
}
pub fn fetchDisable(self: *BrowserContext) void {
self.notification.unregister(.http_request_intercept, self);
self.notification.unregister(.http_request_auth_required, self);
}
pub fn lifecycleEventsEnable(self: *BrowserContext) !void {
self.page_life_cycle_events = true;
try self.notification.register(.frame_network_idle, self, onFrameNetworkIdle);
try self.notification.register(.frame_network_almost_idle, self, onFrameNetworkAlmostIdle);
}
pub fn lifecycleEventsDisable(self: *BrowserContext) void {
self.page_life_cycle_events = false;
self.notification.unregister(.frame_network_idle, self);
self.notification.unregister(.frame_network_almost_idle, self);
}
pub fn consoleEnable(self: *BrowserContext) !void {
try self.notification.register(.console_message, self, onConsoleMessage);
}
pub fn consoleDisable(self: *BrowserContext) void {
self.notification.unregister(.console_message, self);
}
pub fn runtimeEnable(self: *BrowserContext) !void {
try self.notification.register(.runtime_console_message, self, onRuntimeConsoleMessage);
}
pub fn runtimeDisable(self: *BrowserContext) void {
self.notification.unregister(.runtime_console_message, self);
}
pub fn webmcpEnable(self: *BrowserContext) !void {
try self.notification.register(.model_context_tool_added, self, onModelContextToolAdded);
try self.notification.register(.model_context_tool_removed, self, onModelContextToolRemoved);
}
pub fn webmcpDisable(self: *BrowserContext) void {
self.notification.unregister(.model_context_tool_added, self);
self.notification.unregister(.model_context_tool_removed, self);
}
pub fn onModelContextToolAdded(ctx: *anyopaque, event: *const Notification.ModelContextToolEvent) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/webmcp.zig").onToolAdded(self, event);
}
pub fn onModelContextToolRemoved(ctx: *anyopaque, event: *const Notification.ModelContextToolEvent) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/webmcp.zig").onToolRemoved(self, event);
}
pub fn onFrameRemove(ctx: *anyopaque, _: Notification.FrameRemove) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
@import("domains/page.zig").frameRemove(self);
}
pub fn onFrameCreated(ctx: *anyopaque, frame: *Frame) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameCreated(self, frame);
}
pub fn onFrameNavigate(ctx: *anyopaque, msg: *const Notification.FrameNavigate) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameNavigate(self, msg);
}
pub fn onFrameNavigated(ctx: *anyopaque, msg: *const Notification.FrameNavigated) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
return @import("domains/page.zig").frameNavigated(self.notification_arena, self, msg);
}
pub fn onFrameChildFrameCreated(ctx: *anyopaque, msg: *const Notification.FrameChildFrameCreated) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameChildFrameCreated(self, msg);
}
pub fn onFrameNetworkIdle(ctx: *anyopaque, msg: *const Notification.FrameNetworkIdle) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameNetworkIdle(self, msg);
}
pub fn onFrameNetworkAlmostIdle(ctx: *anyopaque, msg: *const Notification.FrameNetworkAlmostIdle) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameNetworkAlmostIdle(self, msg);
}
pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
try @import("domains/network.zig").httpRequestStart(self, msg);
}
pub fn onHttpRequestIntercept(ctx: *anyopaque, msg: *const Notification.RequestIntercept) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
try @import("domains/fetch.zig").requestIntercept(self, msg);
}
pub fn onHttpRequestFail(ctx: *anyopaque, msg: *const Notification.RequestFail) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/network.zig").httpRequestFail(self, msg);
}
pub fn onFrameDOMContentLoaded(ctx: *anyopaque, msg: *const Notification.FrameDOMContentLoaded) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameDOMContentLoaded(self, msg);
}
pub fn onFrameLoaded(ctx: *anyopaque, msg: *const Notification.FrameLoaded) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").frameLoaded(self, msg);
}
pub fn onJavascriptDialogOpening(ctx: *anyopaque, msg: *const Notification.JavascriptDialogOpening) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").javascriptDialogOpening(self, msg);
}
fn keyFromTransfer(transfer: *const Transfer) CDP.BrowserContext.CapturedResponseKey {
return if (transfer.req.resource_type == .document)
.{ .kind = .loader, .id = transfer.req.loader_id }
else
.{ .kind = .request, .id = transfer.id };
}
pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
const arena = self.frame_arena;
// Prepare the captured response value.
const key = keyFromTransfer(msg.transfer);
const gop = try self.captured_responses.getOrPut(arena, key);
if (!gop.found_existing) {
gop.value_ptr.* = .{
.data = .empty,
// Encode the data in base64 by default, but don't encode
// for well known content-type.
.must_encode = blk: {
const response = msg.response;
if (response.contentType()) |ct| {
const mime = try Mime.parse(ct);
if (!mime.isText()) {
break :blk true;
}
if (std.mem.eql(u8, "UTF-8", mime.charsetString())) {
break :blk false;
}
}
break :blk true;
},
};
}
return @import("domains/network.zig").httpResponseHeaderDone(self.notification_arena, self, msg);
}
pub fn onHttpRequestDone(ctx: *anyopaque, msg: *const Notification.RequestDone) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/network.zig").httpRequestDone(self, msg);
}
pub fn onHttpResponseData(ctx: *anyopaque, msg: *const Notification.ResponseData) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
const arena = self.frame_arena;
const key = keyFromTransfer(msg.transfer);
const resp = self.captured_responses.getPtr(key) orelse lp.assert(false, "onHttpResponseData missing captured response", .{});
return resp.data.appendSlice(arena, msg.data);
}
pub fn onHttpRequestAuthRequired(ctx: *anyopaque, data: *const Notification.RequestAuthRequired) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
try @import("domains/fetch.zig").requestAuthRequired(self, data);
}
pub fn onHttpRequestServedFromCache(ctx: *anyopaque, msg: *const Notification.RequestServedFromCache) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
return @import("domains/network.zig").httpServedFromCache(self, msg);
}
pub fn onConsoleMessage(ctx: *anyopaque, msg: *const Notification.ConsoleMessage) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
return @import("domains/console.zig").consoleMessage(self.notification_arena, self, msg);
}
pub fn onRuntimeConsoleMessage(ctx: *anyopaque, msg: *const Notification.ConsoleMessage) !void {
const self: *BrowserContext = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
return @import("domains/runtime.zig").consoleMessage(self.notification_arena, self, msg);
}
fn resetNotificationArena(self: *BrowserContext) void {
defer _ = self.cdp.notification_arena.reset(.{ .retain_with_limit = 1024 * 64 });
}
pub fn callInspector(self: *const BrowserContext, msg: []const u8) void {
self.inspector_session.send(msg);
self.session.browser.env.runMicrotasks();
}
pub fn onInspectorResponse(ctx: *anyopaque, _: u32, msg: []const u8) void {
sendInspectorMessage(@ptrCast(@alignCast(ctx)), msg) catch |err| {
log.err(.cdp, "send inspector response", .{ .err = err });
};
}
pub fn onInspectorEvent(ctx: *anyopaque, msg: []const u8) void {
if (log.enabled(.cdp, .debug)) {
// msg should be {"method":<method>,...
lp.assert(std.mem.startsWith(u8, msg, "{\"method\":"), "onInspectorEvent prefix", .{});
const method_end = std.mem.indexOfScalar(u8, msg, ',') orelse {
log.err(.cdp, "invalid inspector event", .{ .msg = msg });
return;
};
const method = msg[10..method_end];
log.debug(.cdp, "inspector event", .{ .method = method });
}
sendInspectorMessage(@ptrCast(@alignCast(ctx)), msg) catch |err| {
log.err(.cdp, "send inspector event", .{ .err = err });
};
}
// This is hacky x 2. First, we create the JSON payload by gluing our
// session_id onto it. Second, we're much more client/websocket aware than
// we should be.
fn sendInspectorMessage(self: *BrowserContext, msg: []const u8) !void {
const session_id = self.session_id orelse {
// We no longer have an active session. What should we do
// in this case?
return;
};
const cdp = self.cdp;
const allocator = cdp.conn.send_arena.allocator();
const field = ",\"sessionId\":\"";
// + 1 for the closing quote after the session id
// + 10 for the max websocket header
const message_len = msg.len + session_id.len + 1 + field.len + 10;
var buf: std.ArrayList(u8) = .{};
buf.ensureTotalCapacity(allocator, message_len) catch |err| {
log.err(.cdp, "inspector buffer", .{ .err = err });
return;
};
// reserve 10 bytes for websocket header
buf.appendSliceAssumeCapacity(&.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
// -1 because we dont' want the closing brace '}'
buf.appendSliceAssumeCapacity(msg[0 .. msg.len - 1]);
buf.appendSliceAssumeCapacity(field);
buf.appendSliceAssumeCapacity(session_id);
buf.appendSliceAssumeCapacity("\"}");
if (comptime IS_DEBUG) {
std.debug.assert(buf.items.len == message_len);
}
try cdp.conn.sendJSONRaw(buf);
}
};
/// see: https://chromium.googlesource.com/chromium/src/+/master/third_party/blink/renderer/bindings/core/v8/V8BindingDesign.md#world
/// The current understanding. An isolated world lives in the same isolate, but a separated context.
/// Clients create this to be able to create variables and run code without interfering with the
/// normal namespace and values of the webframe. Similar to the main context we need to pretend to recreate it after
/// a executionContextsCleared event which happens when navigating to a new frame. A client can have a command be executed
const ScriptOnNewDocument = struct {
identifier: u32,
source: []const u8,
};
/// in the isolated world by using its Context ID or the worldName.
/// grantUniversalAccess Indicates whether the isolated world can reference objects like the DOM or other JS Objects.
/// An isolated world has it's own instance of globals like Window.
/// Generally the client needs to resolve a node into the isolated world to be able to work with it.
/// An object id is unique across all contexts, different object ids can refer to the same Node in different contexts.
const IsolatedWorld = struct {
arena: Allocator,
call_arena: Allocator,
browser: *Browser,
name: []const u8,
context: ?*js.Context = null,
grant_universal_access: bool,
// Identity tracking for this isolated world (separate from main world).
// This ensures CDP inspector contexts don't share v8::Globals with main world.
identity: js.Identity = .{},
pub fn deinit(self: *IsolatedWorld) void {
self.removeContext();
self.browser.arena_pool.release(self.call_arena);
self.browser.arena_pool.release(self.arena);
}
pub fn removeContext(self: *IsolatedWorld) void {
if (self.context) |ctx| {
self.browser.env.destroyContext(ctx);
self.context = null;
}
// I don't think it's possible to have any identity without a context,
// but there's no harm in being safe.
self.identity.deinit();
self.identity = .{};
}
// The isolate world must share at least some of the state with the related frame, specifically the DocumentHTML
// (assuming grantUniversalAccess will be set to True!).
// We just created the world and the frame. The frame's state lives in the session, but is update on navigation.
// This also means this pointer becomes invalid after removePage until a new frame is created.
// Currently we have only 1 frame and thus also only 1 state in the isolate world.
pub fn createContext(self: *IsolatedWorld, frame: *Frame) !*js.Context {
if (self.context == null) {
const ctx = try self.browser.env.createContext(frame, .{
.identity = &self.identity,
.identity_arena = self.arena,
.call_arena = self.call_arena,
.debug_name = "IsolatedContext",
});
self.context = ctx;
} else {
log.warn(.cdp, "not implemented", .{
.feature = "createContext: Not implemented second isolated context creation",
.info = "reuse existing context",
});
}
return self.context.?;
}
};
// This is a generic because when we send a result we have two different
// behaviors. Normally, we're sending the result to the client. But in some cases
// we want to capture the result. So we want the command.sendResult to be
// generic.
pub const Command = struct {
// A misc arena that can be used for any allocation for processing
// the message
arena: Allocator,
// reference to our CDP instance
cdp: *CDP,
// The browser context this command targets
browser_context: ?*BrowserContext,
// The command input (the id, optional session_id, params, ...)
input: Input,
// In most cases, Sender is going to be cdp itself. We'll call
// sender.sendJSON() and CDP will send it to the client. But some
// commands are dispatched internally, in which cases the Sender will
// be code to capture the data that we were "sending".
sender: Sender,
const Sender = union(enum) {
cdp: *CDP,
capture: *std.Io.Writer,
pub fn sendJSON(self: Sender, message: anytype) !void {
switch (self) {
.cdp => |cdp| return cdp.sendJSON(message),
.capture => |writer| {
return std.json.Stringify.value(message, .{
.emit_null_optional_fields = false,
}, writer);
},
}
}
};
pub fn params(self: *const Command, comptime T: type) !?T {
if (self.input.params) |p| {
return try json.parseFromSliceLeaky(
T,
self.arena,
p.raw,
.{ .ignore_unknown_fields = true },
);
}
return null;
}
pub fn createBrowserContext(self: *Command) !*BrowserContext {
_ = try self.cdp.createBrowserContext();
self.browser_context = &(self.cdp.browser_context.?);
return self.browser_context.?;
}
const SendResultOpts = struct {
include_session_id: bool = true,
};
pub fn sendResult(self: *Command, result: anytype, opts: SendResultOpts) !void {
return self.sender.sendJSON(.{
.id = self.input.id,
.result = if (comptime @typeInfo(@TypeOf(result)) == .null) struct {}{} else result,
.sessionId = if (opts.include_session_id) self.input.session_id else null,
});
}
pub fn sendEvent(self: *Command, method: []const u8, p: anytype, opts: SendEventOpts) !void {
// Events ALWAYS go to the client. self.sender should not be used
return self.cdp.sendEvent(method, p, opts);
}
const SendErrorOpts = struct {
include_session_id: bool = true,
};
pub fn sendError(self: *Command, code: i32, message: []const u8, opts: SendErrorOpts) !void {
return self.sender.sendJSON(.{
.id = self.input.id,
.@"error" = .{ .code = code, .message = message },
.sessionId = if (opts.include_session_id) self.input.session_id else null,
});
}
const Input = struct {
// When we reply to a message, we echo back the message id
id: ?i64,
// The "action" of the message.Given a method of "LOG.enable", the
// action is "enable"
action: []const u8,
// See notes in BrowserContext about session_id
session_id: ?[]const u8,
// Unparsed / untyped input.params.
params: ?InputParams,
// The full raw json input
json: []const u8,
};
};
// When we parse a JSON message from the client, this is the structure
// we always expect. Parsed on the Network thread inside
// Connection.handleMessage; the slices reference the raw JSON bytes
// (or arena allocations for fields that needed unescaping). Both
// outlive the InputMessage for the inbox message's lifetime.
pub const InputMessage = struct {
id: ?i64 = null,
method: []const u8,
params: ?InputParams = null,
sessionId: ?[]const u8 = null,
};
// The JSON "params" field changes based on the "method". Initially, we just
// capture the raw json object (including the opening and closing braces).
// Then, when we're processing the message, and we know what type it is, we
// can parse it (in Disaptch(T).params).
pub const InputParams = struct {
raw: []const u8,
pub fn jsonParse(
_: Allocator,
scanner: *json.Scanner,
_: json.ParseOptions,
) !InputParams {
const height = scanner.stackHeight();
const start = scanner.cursor;
if (try scanner.next() != .object_begin) {
return error.UnexpectedToken;
}
try scanner.skipUntilStackHeight(height);
const end = scanner.cursor;
return .{ .raw = scanner.input[start..end] };
}
};
fn asUint(comptime T: type, comptime string: []const u8) T {
return @bitCast(string[0..string.len].*);
}
const testing = @import("testing.zig");
test "cdp: invalid json" {
var ctx = try testing.context();
defer ctx.deinit();
try testing.expectError(error.InvalidJSON, ctx.processMessage("invalid"));
// method is required
try testing.expectError(error.InvalidJSON, ctx.processMessage(.{}));
try ctx.processMessage(.{
.method = "Target",
});
try ctx.expectSentError(-31998, "InvalidMethod", .{});
try ctx.processMessage(.{
.method = "Unknown.domain",
});
try ctx.expectSentError(-31998, "UnknownDomain", .{});
try ctx.processMessage(.{
.method = "Target.over9000",
});
try ctx.expectSentError(-31998, "UnknownMethod", .{});
}
test "cdp: invalid sessionId" {
var ctx = try testing.context();
defer ctx.deinit();
{
// we have no browser context
try ctx.processMessage(.{ .method = "Hi", .sessionId = "nope" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
{
// we have a browser context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .method = "Hi", .sessionId = "BC-Has-No-SessionId" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
{
// we have a browser context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .method = "Hi", .sessionId = "SESS-1" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
}
test "cdp: STARTUP sessionId" {
var ctx = try testing.context();
defer ctx.deinit();
{
// we have no browser context
try ctx.processMessage(.{ .id = 2, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 2, .index = 0, .session_id = "STARTUP" });
}
{
// we have a browser context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .id = 3, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 3, .index = 1, .session_id = "STARTUP" });
}
{
// we have a browser context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .id = 4, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 4, .index = 2, .session_id = "STARTUP" });
}
}