Files
browser/src/Config.zig
Adrià Arrufat ac82551e66 agent: replace -i flag with /save and /load commands
Removes the `-i`/`--interactive` CLI flag and live file-based
recording. Instead, the REPL now supports a `/load <path>` command
to run scripts from disk, and `/save` to export the in-memory
session recording.

The `Recorder` is simplified to be purely in-memory, and the script
runtime is moved to `src/script/Runtime.zig`.

BREAKING CHANGE: The `-i`/`--interactive` flag has been removed. Use
the `/save` and `/load` commands within the REPL instead.
2026-06-03 16:21:10 +02:00

671 lines
22 KiB
Zig

// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const lp = @import("lightpanda");
const log = lp.log;
const builtin = @import("builtin");
const zenai = @import("zenai");
const cli = @import("cli.zig");
const dump = @import("browser/dump.zig");
const Storage = @import("storage/Storage.zig");
const WebBotAuthConfig = @import("network/WebBotAuth.zig").Config;
const Allocator = std.mem.Allocator;
pub const CDP_MAX_HTTP_REQUEST_SIZE = 4096;
// max message size
// +14 for max websocket payload overhead
// +140 for the max control packet that might be interleaved in a message
pub const CDP_MAX_MESSAGE_SIZE = 512 * 1024 + 14 + 140;
// TCP keepalive parameters applied to accepted CDP connections.
// Detection window ≈ IDLE + CNT * INTVL = 4 + 3*2 = 10s.
pub const CDP_KEEPALIVE_IDLE_S: c_int = 4;
pub const CDP_KEEPALIVE_INTVL_S: c_int = 2;
pub const CDP_KEEPALIVE_CNT: c_int = 3;
const Config = @This();
fn logFilterScopesValidator(allocator: Allocator, args: *std.process.ArgIterator, list: *std.ArrayList(log.Scope)) !void {
const str = args.next() orelse return error.InvalidOption;
var it = std.mem.splitScalar(u8, str, ',');
while (it.next()) |part| {
const v = std.meta.stringToEnum(log.Scope, part) orelse {
log.fatal(.app, "invalid option choice", .{ .arg = "--log-filter-scopes", .value = part });
return error.InvalidOption;
};
try list.append(allocator, v);
}
}
fn logLevelValidator(_: Allocator, args: *std.process.ArgIterator) !?log.Level {
const str = args.next() orelse return error.MissingArgument;
if (std.mem.eql(u8, str, "error")) {
return .err;
}
return std.meta.stringToEnum(log.Level, str) orelse {
log.fatal(.app, "invalid option choice", .{ .arg = "--log-level", .value = str });
return error.InvalidArgument;
};
}
/// Common CLI args.
const CommonOptions = .{
.{ .name = "obey_robots", .type = bool },
.{ .name = "proxy_bearer_token", .type = ?[:0]const u8 },
.{ .name = "http_proxy", .type = ?[:0]const u8 },
.{ .name = "http_max_concurrent", .type = ?u8 },
.{ .name = "http_max_host_open", .type = ?u8 },
.{ .name = "http_timeout", .type = ?u31 },
.{ .name = "http_connect_timeout", .type = ?u31 },
.{ .name = "http_max_response_size", .type = ?usize },
.{ .name = "ws_max_concurrent", .type = ?u8 },
.{ .name = "insecure_disable_tls_host_verification", .type = bool },
.{ .name = "log_level", .type = ?log.Level, .validator = logLevelValidator },
.{ .name = "log_format", .type = ?log.Format },
.{ .name = "log_filter_scopes", .type = log.Scope, .multiple = true, .validator = logFilterScopesValidator },
.{ .name = "user_agent_suffix", .type = ?[]const u8 },
.{ .name = "http_cache_dir", .type = ?[]const u8 },
.{ .name = "web_bot_auth_key_file", .type = ?[]const u8 },
.{ .name = "web_bot_auth_keyid", .type = ?[]const u8 },
.{ .name = "web_bot_auth_domain", .type = ?[]const u8 },
.{ .name = "user_agent", .type = ?[]const u8 },
.{ .name = "block_private_networks", .type = bool },
.{ .name = "block_cidrs", .type = ?[]const u8 },
.{ .name = "cookie", .type = ?[]const u8 },
.{ .name = "cookie_jar", .type = ?[]const u8 },
.{ .name = "storage_engine", .type = ?Storage.EngineType },
.{ .name = "storage_sqlite_path", .type = ?[:0]const u8 },
.{ .name = "disable_subframes", .type = bool },
.{ .name = "disable_workers", .type = bool },
.{ .name = "enable_external_stylesheets", .type = bool },
};
fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat {
// Peek next argument.
var peek_args = args.*;
if (peek_args.next()) |next_arg| {
const mode = std.meta.stringToEnum(DumpFormat, next_arg) orelse {
return .html;
};
// Skip the argument we peek if successful.
_ = args.next();
return mode;
}
// Means we couldn't get something like `--dump html` but we do have
// `--dump`; which should fall to `html` by default.
return .html;
}
pub const AiProvider = std.meta.Tag(zenai.provider.Client);
/// Controls how chatty `agent` mode is on stderr.
pub const AgentVerbosity = enum {
/// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only.
low,
/// + one `● [tool: …]` line per tool call.
medium,
/// + the matching `[result: …]` body for each call.
high,
pub fn atLeast(self: AgentVerbosity, min: AgentVerbosity) bool {
return @intFromEnum(self) >= @intFromEnum(min);
}
};
fn waitScriptFileValidator(allocator: Allocator, args: *std.process.ArgIterator) !?[:0]const u8 {
const path = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--wait-script-file" });
return error.InvalidArgument;
};
return std.fs.cwd().readFileAllocOptions(allocator, path, 1024 * 1024, null, .of(u8), 0) catch |err| {
log.fatal(.app, "failed to read file", .{ .arg = "--wait-script-file", .path = path, .err = err });
return error.InvalidArgument;
};
}
fn injectScriptFileValidator(
allocator: Allocator,
args: *std.process.ArgIterator,
list: *std.ArrayList([]const u8),
) !void {
const path = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--inject-script-file" });
return error.InvalidArgument;
};
const bytes = std.fs.cwd().readFileAllocOptions(allocator, path, std.math.maxInt(usize), null, .of(u8), null) catch |err| {
log.fatal(.app, "failed to read file", .{ .arg = "--inject-script-file", .path = path, .err = err });
return error.InvalidArgument;
};
return list.append(allocator, bytes);
}
/// Definition for all the commands and its arguments. See @cli.zig for further.
const Commands = cli.Builder(.{
.{
.name = "serve",
.options = .{
.{ .name = "host", .type = []const u8, .default = "127.0.0.1" },
.{ .name = "port", .type = u16, .default = 9222 },
.{ .name = "advertise_host", .type = ?[]const u8 },
.{ .name = "timeout", .type = ?u31 },
.{ .name = "cdp_max_connections", .type = u16, .default = 16 },
.{ .name = "cdp_max_pending_connections", .type = u16, .default = 128 },
},
.shared_options = CommonOptions,
},
.{
.name = "fetch",
// This argument can be given out of order.
.positional = .{ .name = "url", .type = ?[:0]const u8 },
.options = .{
.{ .name = "dump", .type = ?DumpFormat, .validator = dumpValidator },
.{ .name = "with_base", .type = bool },
.{ .name = "with_frames", .type = bool },
.{ .name = "strip_mode", .type = dump.Opts.Strip, .default = dump.Opts.Strip{} },
.{ .name = "wait_ms", .type = u32, .default = 5_000 },
.{ .name = "wait_until", .type = ?WaitUntil },
.{
.name = "wait_script",
.type = ?[:0]const u8,
.variants = .{
.{ .name = "wait_script_file", .validator = waitScriptFileValidator },
},
},
.{ .name = "wait_selector", .type = ?[:0]const u8 },
.{
.name = "inject_script",
.type = []const u8,
.multiple = true,
.variants = .{
.{ .name = "inject_script_file", .validator = injectScriptFileValidator },
},
},
.{ .name = "terminate_ms", .type = ?u32 },
.{ .name = "json", .type = bool },
},
.shared_options = CommonOptions,
},
.{
.name = "mcp",
.options = .{
.{ .name = "cdp_port", .type = ?u16 },
},
.shared_options = CommonOptions,
},
.{
.name = "agent",
.positional = .{ .name = "script_file", .type = ?[:0]const u8 },
.options = .{
.{ .name = "provider", .type = ?AiProvider },
.{ .name = "model", .type = ?[:0]const u8 },
.{ .name = "base_url", .type = ?[:0]const u8 },
.{ .name = "system_prompt", .type = ?[:0]const u8 },
.{ .name = "task", .type = ?[]const u8 },
.{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true },
.{ .name = "verbosity", .type = ?AgentVerbosity },
.{ .name = "list_models", .type = bool },
.{ .name = "no_llm", .type = bool },
},
.shared_options = CommonOptions,
},
.{ .name = "version", .options = .{} },
});
pub const RunMode = Commands.Enum;
pub const Mode = Commands.Union;
pub const Agent = @FieldType(Mode, "agent");
mode: Mode,
exec_name: []const u8,
http_headers: HttpHeaders,
fn modeNeedsHttp(mode: Mode) bool {
return mode != .help and mode != .version;
}
pub fn init(allocator: Allocator, exec_name: []const u8, mode: Mode) !Config {
var config = Config{
.mode = mode,
.exec_name = exec_name,
.http_headers = undefined,
};
if (modeNeedsHttp(mode)) {
config.http_headers = try HttpHeaders.init(allocator, &config);
}
return config;
}
pub fn deinit(self: *const Config, allocator: Allocator) void {
if (modeNeedsHttp(self.mode)) {
self.http_headers.deinit(allocator);
}
}
pub fn tlsVerifyHost(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| !opts.insecure_disable_tls_host_verification,
else => unreachable,
};
}
pub fn obeyRobots(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.obey_robots,
else => unreachable,
};
}
pub fn disableSubframes(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.disable_subframes,
else => unreachable,
};
}
pub fn disableWorkers(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.disable_workers,
else => unreachable,
};
}
pub fn enableExternalStylesheets(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.enable_external_stylesheets,
else => unreachable,
};
}
pub fn httpProxy(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_proxy,
else => unreachable,
};
}
pub fn proxyBearerToken(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.proxy_bearer_token,
.help, .version => null,
};
}
pub fn httpMaxConcurrent(self: *const Config) u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_max_concurrent orelse 10,
else => unreachable,
};
}
pub fn httpMaxHostOpen(self: *const Config) u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_max_host_open orelse 4,
else => unreachable,
};
}
pub fn httpConnectTimeout(self: *const Config) u31 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_connect_timeout orelse 0,
else => unreachable,
};
}
pub fn httpTimeout(self: *const Config) u31 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_timeout orelse 5000,
else => unreachable,
};
}
pub fn httpMaxRedirects(_: *const Config) u8 {
return 10;
}
pub fn httpMaxResponseSize(self: *const Config) ?usize {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_max_response_size,
else => unreachable,
};
}
pub fn wsMaxConcurrent(self: *const Config) u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.ws_max_concurrent orelse 8,
else => unreachable,
};
}
pub fn logLevel(self: *const Config) ?log.Level {
return switch (self.mode) {
// Agent mode quiets page-driven `console.error` noise unless verbosity=high.
.agent => |opts| opts.log_level orelse switch (agentVerbosity(opts)) {
.low, .medium => .err,
.high => null,
},
inline .serve, .fetch, .mcp => |opts| opts.log_level,
else => unreachable,
};
}
/// Resolve --verbosity. Explicit value wins. Else: --task with stderr
/// captured (pipe/file) defaults to .high so benchmark harnesses and
/// other programmatic consumers get the [tool/result] trace; REPL and
/// --task on a TTY default to .low.
pub fn agentVerbosity(opts: Agent) AgentVerbosity {
if (opts.verbosity) |v| return v;
const piped_one_shot = opts.task != null and !stderrIsTty();
return if (piped_one_shot) .high else .low;
}
/// `isatty(STDERR)` is a syscall and `agentVerbosity` is on the log hot
/// path (every gate check resolves through it). Cache once — the fd
/// doesn't change after process start.
var stderr_tty_cached: bool = undefined;
var stderr_tty_once = std.once(initStderrTty);
fn initStderrTty() void {
stderr_tty_cached = std.posix.isatty(std.posix.STDERR_FILENO);
}
fn stderrIsTty() bool {
stderr_tty_once.call();
return stderr_tty_cached;
}
pub fn logFormat(self: *const Config) ?log.Format {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.log_format,
else => unreachable,
};
}
pub fn logFilterScopes(self: *const Config) std.ArrayList(log.Scope) {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.log_filter_scopes,
else => unreachable,
};
}
pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.user_agent_suffix,
.help, .version => null,
};
}
pub fn userAgent(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.user_agent,
.help, .version => null,
};
}
pub fn httpCacheDir(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.http_cache_dir,
else => null,
};
}
pub fn cookieFile(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.cookie,
else => null,
};
}
pub fn cookieJarFile(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .fetch, .mcp, .agent => |opts| opts.cookie_jar,
else => null,
};
}
pub fn port(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.port,
.mcp => |opts| opts.cdp_port orelse 0,
else => unreachable,
};
}
pub fn advertiseHost(self: *const Config) []const u8 {
return switch (self.mode) {
.serve => |opts| opts.advertise_host orelse opts.host,
.mcp => "127.0.0.1",
else => unreachable,
};
}
pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| WebBotAuthConfig{
.key_file = opts.web_bot_auth_key_file orelse return null,
.keyid = opts.web_bot_auth_keyid orelse return null,
.domain = opts.web_bot_auth_domain orelse return null,
},
.help, .version => null,
};
}
pub fn blockPrivateNetworks(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.block_private_networks,
else => unreachable,
};
}
pub fn blockCidrs(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.block_cidrs,
else => unreachable,
};
}
pub fn maxConnections(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_connections,
.mcp => 16,
.fetch, .agent => 0,
else => unreachable,
};
}
pub fn maxPendingConnections(self: *const Config) u31 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_pending_connections,
.mcp => 128,
else => unreachable,
};
}
pub fn storageEngine(self: *const Config) ?Storage.EngineType {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.storage_engine,
else => unreachable,
};
}
pub fn storageSqlitePath(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp, .agent => |opts| opts.storage_sqlite_path,
else => unreachable,
};
}
pub const DumpFormat = enum {
html,
markdown,
wpt,
semantic_tree,
semantic_tree_text,
};
pub const WaitUntil = enum {
load,
domcontentloaded,
networkalmostidle,
networkidle,
done,
};
/// Pre-formatted HTTP headers for reuse across Http and Client.
/// Must be initialized with an allocator that outlives all HTTP connections.
pub const HttpHeaders = struct {
const user_agent_base: [:0]const u8 = "Lightpanda/1.0";
const Brand = struct {
brand: [:0]const u8,
version: [:0]const u8,
};
/// Source of truth for client-hints brand data. Both the Sec-Ch-Ua
/// HTTP header and navigator.userAgentData.brands derive from this
/// list, so the two sides cannot drift.
pub const brands = [_]Brand{
.{ .brand = "Lightpanda", .version = "1" },
};
pub const sec_ch_ua: [:0]const u8 = blk: {
var out: [:0]const u8 = "Sec-Ch-Ua:";
for (brands, 0..) |b, i| {
const sep = if (i == 0) " " else ", ";
out = out ++ sep ++ "\"" ++ b.brand ++ "\";v=\"" ++ b.version ++ "\"";
}
break :blk out;
};
// Some bot-protection frontends (e.g. Akamai on canada.ca) RST the HTTP/2
// stream when a client sends Accept-Encoding without Accept-Language,
// treating it as a bot signal. Ship a neutral default so we look like a
// normal client.
pub const accept_language: [:0]const u8 = "Accept-Language: en-US,en;q=0.9";
// Document-navigation Accept value Chrome sends.
pub const navigation_accept: [:0]const u8 = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
user_agent: [:0]const u8, // User agent value (e.g. "Lightpanda/1.0")
user_agent_header: [:0]const u8,
proxy_bearer_header: ?[:0]const u8,
pub fn init(allocator: Allocator, config: *const Config) !HttpHeaders {
const user_agent: [:0]const u8 = if (config.userAgent()) |ua|
try allocator.dupeZ(u8, ua)
else if (config.userAgentSuffix()) |suffix|
try std.fmt.allocPrintSentinel(allocator, "{s} {s}", .{ user_agent_base, suffix }, 0)
else
user_agent_base;
errdefer if (config.userAgent() != null or config.userAgentSuffix() != null) allocator.free(user_agent);
const user_agent_header = try std.fmt.allocPrintSentinel(allocator, "User-Agent: {s}", .{user_agent}, 0);
errdefer allocator.free(user_agent_header);
const proxy_bearer_header: ?[:0]const u8 = if (config.proxyBearerToken()) |token|
try std.fmt.allocPrintSentinel(allocator, "Proxy-Authorization: Bearer {s}", .{token}, 0)
else
null;
return .{
.user_agent = user_agent,
.user_agent_header = user_agent_header,
.proxy_bearer_header = proxy_bearer_header,
};
}
pub fn deinit(self: *const HttpHeaders, allocator: Allocator) void {
if (self.proxy_bearer_header) |hdr| {
allocator.free(hdr);
}
allocator.free(self.user_agent_header);
if (self.user_agent.ptr != user_agent_base.ptr) {
allocator.free(self.user_agent);
}
}
};
pub fn printUsageAndExit(self: *const Config, help_for: RunMode, success: bool) void {
const exec_name = self.exec_name;
const Help = @import("help.zon");
const is_debug = builtin.mode == .Debug;
const info_or_warn = if (comptime is_debug) "info" else "warn";
const pretty_or_logfmt = if (comptime is_debug) "pretty" else "logfmt";
const comptimePrint = std.fmt.comptimePrint;
switch (help_for) {
// Requested help for everything.
.help => {
const template = comptimePrint(
\\{s}
\\
, .{Help.general});
std.debug.print(template, .{exec_name});
},
inline .fetch, .serve, .mcp, .agent => |tag| {
const template = comptimePrint(
\\{s}
\\
\\{s}
\\
, .{ @field(Help, @tagName(tag)), Help.common_options });
std.debug.print(template, .{ exec_name, info_or_warn, pretty_or_logfmt });
},
.version => {
const template = Help.version ++ "\n";
std.debug.print(template, .{exec_name});
},
}
if (success) {
return std.process.cleanExit();
}
std.process.exit(1);
}
pub fn parseArgs(allocator: Allocator) !Config {
const exec_name, const command = try Commands.parse(allocator);
if (command == .serve and command.serve.timeout != null) {
log.warn(.app, "--timeout is deprecated", .{});
}
return .init(allocator, exec_name, command);
}
pub fn validateUserAgent(ua: []const u8) !void {
for (ua) |c| {
if (!std.ascii.isPrint(c)) {
return error.NonPrintable;
}
}
if (std.ascii.indexOfIgnoreCase(ua, "mozilla") != null) {
return error.Reserved;
}
}