Merge branch 'main' into agent

This commit is contained in:
Adrià Arrufat
2026-05-19 11:33:46 +02:00
7 changed files with 558 additions and 439 deletions

View File

@@ -235,7 +235,6 @@ const Commands = cli.Builder(.{
.shared_options = CommonOptions,
},
.{ .name = "version", .options = .{} },
.{ .name = "help", .positional = .{ .name = "subcommand", .type = ?[]const u8 }, .options = .{} },
});
pub const RunMode = Commands.Enum;
@@ -588,394 +587,53 @@ pub const HttpHeaders = struct {
}
};
pub fn printUsageAndExit(self: *const Config, success: bool) void {
// MAX_HELP_LEN|
const common_options =
\\
\\--insecure-disable-tls-host-verification
\\ Disables host verification on all HTTP requests. This is an
\\ advanced option which should only be set if you understand
\\ and accept the risk of disabling host verification.
\\
\\--obey-robots
\\ Fetches and obeys the robots.txt (if available) of the web pages
\\ we make requests towards.
\\ Defaults to false.
\\
\\--disable-subframes
\\ Skip loading <iframe> elements. The HTML parser registers them
\\ in the DOM but no child frame, document fetch, or
\\ Page.frameAttached / Runtime.executionContextCreated events are
\\ produced. Useful for pages that load many analytics / pixel
\\ iframes where each subframe navigation invalidates driver-side
\\ executionContextIds (lightpanda-io/browser#2400). On the CDP
\\ serve path, drivers can also toggle this per-session via the
\\ LP.configureLoading method.
\\ Defaults to false.
\\
\\--disable-workers
\\ Skip loading dedicated Web Workers. The Worker constructor
\\ still returns a Worker object so calling pages do not throw,
\\ but no script fetch is initiated and the worker scope's
\\ eval never runs (postMessage from the page to the worker is
\\ queued indefinitely). Sidesteps a v8 entered-context
\\ corruption that crashes the process when an in-page Worker
\\ completes its script fetch under specific HTTP-proxy timing
\\ conditions on Shopify storefront pages. Drivers can also
\\ toggle this per-session via the LP.configureLoading method.
\\ Defaults to false.
\\
\\--block-private-networks
\\ Blocks HTTP requests to private/internal IP addresses
\\ after DNS resolution. Useful for sandboxing, multi-tenant
\\ deployments, and preventing access to internal infrastructure
\\ regardless of what triggers the request (JavaScript, HTML
\\ resources, redirects, etc.).
\\ Defaults to false.
\\
\\--block-cidrs
\\ Additional CIDR ranges to block, comma-separated.
\\ Prefix with '-' to allow (exempt from blocking).
\\ e.g. --block-cidrs 169.254.169.254/32,fd00:ec2::254/128
\\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32
\\ Can be used standalone or combined with --block-private-networks.
\\
\\--http-proxy The HTTP proxy to use for all HTTP requests.
\\ A username:password can be included for basic authentication.
\\ Defaults to none.
\\
\\--proxy-bearer-token
\\ The <token> to send for bearer authentication with the proxy
\\ Proxy-Authorization: Bearer <token>
\\
\\--http-max-concurrent
\\ The maximum number of concurrent HTTP requests.
\\ Defaults to 10.
\\
\\--http-max-host-open
\\ The maximum number of open connection to a given host:port.
\\ Defaults to 4.
\\
\\--http-connect-timeout
\\ The time, in milliseconds, for establishing an HTTP connection
\\ before timing out. 0 means it never times out.
\\ Defaults to 0.
\\
\\--http-timeout
\\ The maximum time, in milliseconds, the transfer is allowed
\\ to complete. 0 means it never times out.
\\ Defaults to 10000.
\\
\\--http-max-response-size
\\ Limits the acceptable response size for any request
\\ (e.g. XHR, fetch, script loading, ...).
\\ Defaults to no limit.
\\
\\--ws-max-concurrent
\\ The maximum number of concurrent WebSocket connections.
\\ Defaults to 8.
\\
\\--log-level The log level: debug, info, warn, error or fatal.
\\ Defaults to
++ (if (builtin.mode == .Debug) " info." else "warn.") ++
\\
\\
\\--log-format The log format: pretty or logfmt.
\\ Defaults to
++ (if (builtin.mode == .Debug) " pretty." else " logfmt.") ++
\\
\\
\\--log-filter-scopes
\\ Filter out too verbose logs per scope:
\\ http, unknown_prop, event, ...
\\
\\--user-agent Override the User-Agent header entirely
\\ User-Agent mustn't impersonate other browser.
\\ Any value containing "Mozilla" is forbidden.
\\ The browser will continue to send Sec-Ch-Ua header.
\\ Incompatible with --user-agent-suffix
\\
\\--user-agent-suffix
\\ Suffix to append to the Lightpanda/X.Y User-Agent
\\
\\--web-bot-auth-key-file
\\ Path to the Ed25519 private key PEM file.
\\
\\--web-bot-auth-keyid
\\ The JWK thumbprint of your public key.
\\
\\--web-bot-auth-domain
\\ Your domain e.g. yourdomain.com
\\
\\--http-cache-dir
\\ Path to a directory to use as a Filesystem Cache for network resources.
\\ Omitting this will result is no caching.
\\ Defaults to no caching.
\\
\\--storage-engine
\\ The storage engine to use. Choices are: none, sqlite.
\\ Default to none.
\\
\\--storage-sqlite-path
\\ Path to SQLite database file for persistent storage.
\\ Use ":memory:" for in-memory storage.
;
pub fn printUsageAndExit(self: *const Config, help_for: RunMode, success: bool) void {
const exec_name = self.exec_name;
const Help = @import("help.zon");
const is_debug = builtin.mode == .Debug;
const info_or_warn = if (comptime is_debug) "info" else "warn";
const pretty_or_logfmt = if (comptime is_debug) "pretty" else "logfmt";
const comptimePrint = std.fmt.comptimePrint;
// MAX_HELP_LEN|
const fetch_options =
\\fetch command
\\Fetches the specified URL
\\Example: {0s} fetch --dump html https://lightpanda.io/
\\
\\Options:
\\--dump Dumps document to stdout.
\\ Argument must be 'html', 'markdown', 'semantic_tree', or 'semantic_tree_text'.
\\ Defaults to no dump.
\\
\\--strip-mode Comma separated list of tag groups to remove from dump
\\ the dump. e.g. --strip-mode js,css
\\ - "js" script and link[as=script, rel=preload]
\\ - "ui" includes img, picture, video, css and svg
\\ - "css" includes style and link[rel=stylesheet]
\\ - "full" includes js, ui and css
\\
\\--with-base Add a <base> tag in dump. Defaults to false.
\\
\\--with-frames Includes the contents of iframes. Defaults to false.
\\
\\--wait-ms Wait time in milliseconds. Supersedes all other --wait
\\ parameters.
\\ Defaults to 5000.
\\
\\--wait-until Wait until the specified event. Checked before the other
\\ --wait- options. Supported events: load, domcontentloaded,
\\ networkidle, done.
\\ Defaults to 'done'. If --wait-selector, --wait-script or
\\ --wait-script-file are specified, defaults to none.
\\
\\--wait-selector Wait for an element matching the CSS selector to appear.
\\ Checked after --wait-until condition is met.
\\
\\--wait-script Wait for a JavaScript expression to return truthy.
\\ Checked after --wait-until condition is met.
\\
\\--wait-script-file
\\ Like --wait-script, but reads the script from a file.
\\
\\--inject-script JavaScript to execute as the document's <head> is
\\ parsed, before any other scripts in the page run.
\\ Can be passed multiple times; scripts run in order.
\\
\\--inject-script-file
\\ Like --inject-script, but reads the script from a file.
\\ Can be passed multiple times; can be mixed with
\\ --inject-script and runs in CLI order.
\\
\\--terminate-ms Hard deadline in milliseconds. After this time elapses,
\\ JavaScript execution is forcibly terminated (e.g. for
\\ pages with endless scripts). Unlike --wait-ms, which
\\ only stops waiting, --terminate-ms aborts the page.
\\ Defaults to no terminate.
\\
\\--cookie Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
\\--cookie-jar Path to a JSON file to save cookies to on exit (write-only).
\\ Defaults to no cookie saving.
\\
++ common_options;
// MAX_HELP_LEN|
const serve_options =
\\serve command
\\Starts a websocket CDP server
\\Example: {0s} serve --host 127.0.0.1 --port 9222
\\
\\Options:
\\--host Host of the CDP server
\\ Defaults to "127.0.0.1"
\\
\\--port Port of the CDP server
\\ Defaults to 9222
\\
\\--advertise-host
\\ The host to advertise, e.g. in the /json/version response.
\\ Useful, for example, when --host is 0.0.0.0.
\\ Defaults to --host value
\\
\\--cdp-max-connections
\\ Maximum number of simultaneous CDP connections.
\\ Defaults to 16.
\\
\\--cdp-max-pending-connections
\\ Maximum pending connections in the accept queue.
\\ Defaults to 128.
\\
\\--cookie Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
++ common_options;
// MAX_HELP_LEN|
const mcp_options =
\\mcp command
\\Starts an MCP (Model Context Protocol) server over stdio
\\Example: {0s} mcp
\\
\\Options:
\\--cookie Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
\\--cookie-jar Path to a JSON file to save cookies to on exit (write-only).
\\ Defaults to no cookie saving.
\\
++ common_options;
// MAX_HELP_LEN|
const agent_options =
\\agent command
\\Starts an interactive AI agent that can browse the web
\\Example: {0s} agent (auto-detects API key from env)
\\Example: {0s} agent --provider anthropic --model claude-sonnet-4-6
\\Example: {0s} agent --provider ollama --model gemma4
\\Example: {0s} agent --no-llm (basic PandaScript-only REPL)
\\Example: {0s} agent script.lp (replay a recorded script)
\\Example: {0s} agent -i script.lp (replay then drop into REPL,
\\ appending new commands to the file)
\\
\\Arguments:
\\[script_file] Optional path to a .lp script.
\\ Without -i: replays the script (no LLM calls).
\\ With -i: replays if present, then enters the REPL and
\\ appends any new commands to the file (creating it if
\\ it does not yet exist).
\\ Caution: .lp files can contain EVAL blocks that run
\\ arbitrary JavaScript in the page. Only replay scripts
\\ you trust, the same way you would a shell script.
\\
\\Options:
\\--provider The AI provider: anthropic, openai, gemini, or ollama.
\\ Optional. When omitted, lightpanda auto-detects an API
\\ key from your environment (ANTHROPIC_API_KEY,
\\ OPENAI_API_KEY, GOOGLE_API_KEY/GEMINI_API_KEY). With
\\ exactly one key set: that provider is used. With
\\ multiple keys on a TTY: you'll be prompted to pick;
\\ in non-interactive contexts, pass --provider
\\ explicitly. With no keys set: falls back to the basic
\\ REPL (PandaScript only, no natural-language input,
\\ no LOGIN / ACCEPT_COOKIES keywords, no --self-heal).
\\
\\--no-llm Force the basic REPL even when an API key is present
\\ or --provider is set. Useful for testing PandaScript
\\ without burning tokens, or for disabling the LLM in
\\ a saved command without editing the existing flags.
\\ Wins over --provider.
\\
\\--model The model name to use.
\\ Defaults to a sensible default per provider.
\\ Wins over --pick-model.
\\
\\--pick-model Fetch the provider's model list and prompt you to
\\ pick one at startup, instead of using the baked-in
\\ default. Requires a TTY. Ignored when --model is
\\ also passed.
\\
\\--base-url Override the API base URL for the provider.
\\ Defaults to the provider's standard endpoint.
\\ Ollama default: http://localhost:11434/v1
\\
\\--system-prompt Override the default system prompt.
\\
\\--self-heal On tool errors, ask the model to recover by retrying
\\ with fresh page state instead of aborting.
\\
\\-i, --interactive
\\ After replaying the positional script (if any), drop
\\ into the REPL with the browser state preserved. When
\\ a positional script is present, any new commands
\\ entered in the REPL are appended to that file.
\\ Conflicts with --task.
\\
\\--task One-shot mode: run a single user turn, print the
\\ final answer to stdout, and exit. Conflicts with the
\\ positional script and with --interactive.
\\
\\-a, --attach <path>
\\ Feed a local file to the model alongside --task.
\\ Repeatable, one file per flag. Text files inlined
\\ (max 512 KiB each); images/audio/pdf base64-encoded
\\ (max 20 MiB each). Requires --task.
\\
\\--list-models Print the model IDs usable with `agent` for
\\ --provider, one per line, sorted, and exit.
\\ Auto-detects the provider from env when --provider
\\ is omitted.
\\
\\--verbosity Stderr chatter level: low, medium, high.
\\ low: silent in --task mode (final answer to
\\ stdout only); spinner + summary in REPL.
\\ medium: + one `● [tool: ...]` line per call.
\\ high: + the matching `[result: ...]` body
\\ (required by the benchmarks harness).
\\ Default: high when --task captures stderr to
\\ a pipe or file; low otherwise. low/medium also
\\ raise --log-level to err (mutes page-side
\\ console.error spam) unless --log-level is set
\\ explicitly.
\\
\\API keys are read from the environment:
\\ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY/GEMINI_API_KEY.
\\Ollama does not require an API key.
\\
++ common_options;
// MAX_HELP_LEN|
const usage =
\\usage: {0s} command [options] [URL]
\\
\\Command can be either 'fetch', 'serve', 'mcp', 'agent' or 'help'
\\
++ fetch_options ++
\\
\\
++ serve_options ++
\\
\\
++ mcp_options ++
\\
\\
++ agent_options ++
\\
\\version command
\\Displays the version of {0s}
\\
\\help command
\\Displays this message
\\
;
// When called with a subcommand argument,
// print only the relevant subcommand section instead of the full help.
switch (self.mode) {
.help => |h| if (h.subcommand) |sub| {
if (std.mem.eql(u8, sub, "fetch")) {
std.debug.print(fetch_options ++ "\n", .{self.exec_name});
} else if (std.mem.eql(u8, sub, "serve")) {
std.debug.print(serve_options ++ "\n", .{self.exec_name});
} else if (std.mem.eql(u8, sub, "mcp")) {
std.debug.print(mcp_options ++ "\n", .{self.exec_name});
} else if (std.mem.eql(u8, sub, "agent")) {
std.debug.print(agent_options ++ "\n", .{self.exec_name});
} else {
std.debug.print(usage, .{self.exec_name});
}
if (success) return std.process.cleanExit();
std.process.exit(1);
switch (help_for) {
// Requested help for everything.
.help => {
const template = comptimePrint(
\\
\\Command can be either "fetch", "serve", "mcp", "agent" or "help".
\\
\\{s}
\\
\\{s}
\\
\\{s}
\\
\\{s}
\\
\\{s}
\\
\\{s}
\\
\\{s}
\\
, .{ Help.fetch, Help.serve, Help.mcp, Help.agent, Help.common, Help.version, Help.help });
std.debug.print(template, .{ exec_name, info_or_warn, pretty_or_logfmt });
},
inline .fetch, .serve, .mcp, .agent => |tag| {
const template = comptimePrint(
\\{s}
\\
\\{s}
\\
, .{ @field(Help, @tagName(tag)), Help.common });
std.debug.print(template, .{ exec_name, info_or_warn, pretty_or_logfmt });
},
.version => {
const template = Help.version ++ "\n";
std.debug.print(template, .{exec_name});
},
else => {},
}
std.debug.print(usage, .{self.exec_name});
if (success) {
return std.process.cleanExit();
}

View File

@@ -667,7 +667,7 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
.headers = headers,
.body = opts.body,
.cookie_jar = &session.cookie_jar,
.cookie_origin = self.url,
.cookie_origin = opts.initiator_url orelse self.url,
.resource_type = .document,
.notification = self._session.notification,
.header_callback = frameHeaderDoneCallback,
@@ -768,8 +768,18 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url
// runs (processRootQueuedNavigation rebuilds the Page in-place), so dup
// into the QueuedNavigation arena which outlives that tear-down.
var nav_opts = opts;
if (nav_opts.referer == null and std.mem.startsWith(u8, originator.url, "http")) {
nav_opts.referer = try arena.dupe(u8, originator.url);
if (std.mem.startsWith(u8, originator.url, "http")) {
// The same dup feeds two purposes: Referer header (subject to
// Referrer-Policy in the future) and SameSite computation (which
// must use the real initiator regardless of policy). We share the
// same allocation for both.
const dup = try arena.dupeZ(u8, originator.url);
if (nav_opts.referer == null) {
nav_opts.referer = dup;
}
if (nav_opts.initiator_url == null) {
nav_opts.initiator_url = dup;
}
}
const qn = try arena.create(QueuedNavigation);
@@ -1353,11 +1363,14 @@ pub fn iframeAddedCallback(self: *Frame, iframe: *IFrame) !void {
);
};
// Iframe's initial src request carries the parent's URL as Referer and
// as the SameSite initiator. Parent frame outlives this navigate()
// call, so the slice is safe.
const parent_url: ?[:0]const u8 = if (std.mem.startsWith(u8, self.url, "http")) self.url else null;
new_frame.navigate(url, .{
.reason = .initialFrameNavigation,
// Iframe's initial src request carries the parent's URL as Referer.
// Parent frame outlives this navigate() call, so the slice is safe.
.referer = if (std.mem.startsWith(u8, self.url, "http")) self.url else null,
.referer = parent_url,
.initiator_url = parent_url,
}) catch |err| {
log.warn(.frame, "iframe navigate failure", .{ .url = url, .err = err });
self._pending_loads -= 1;
@@ -3647,6 +3660,11 @@ pub const NavigateOpts = struct {
// anchor click / form submit / location.href navigations carry a Referer.
// null on CDP Page.navigate (address-bar) and Page.reload — matches Chrome.
referer: ?[]const u8 = null,
// The URL of the document that initiated this navigation, used as the
// "site for cookies" when computing SameSite. Distinct from `referer`
// because a Referrer-Policy can suppress the Referer header without
// affecting SameSite (which always considers the real initiator).
initiator_url: ?[:0]const u8 = null,
force: bool = false,
kind: NavigationKind = .{ .push = null },
};

View File

@@ -530,7 +530,9 @@ pub fn syncRequest(self: *Client, allocator: Allocator, req: Request) !SyncRespo
switch (status) {
.cdp_socket => {
const cdp = self.cdp_client.?;
_ = cdp.blocking_read(cdp.ctx);
if (cdp.blocking_read(cdp.ctx) == false) {
return error.ClientDisconnected;
}
},
.normal => continue,
}

View File

@@ -240,12 +240,10 @@ fn validateCookieString(str: []const u8) ValidateCookieError!void {
// Invalid if (c < 32 AND c != 9) OR c > 126. Tab is the one
// sub-space byte we allow through (per browser/WPT behavior).
const below = @intFromBool(chunk < space) & @intFromBool(chunk != tab);
const above = @intFromBool(chunk > tilde);
const reduced: std.meta.Int(.unsigned, size) = @bitCast(below | above);
const is_invalid = ((chunk < space) & (chunk != tab)) | (chunk > tilde);
// Got match.
if (reduced != 0) {
if (@reduce(.Or, is_invalid)) {
return error.InvalidByteSequence;
}
}
@@ -940,6 +938,40 @@ test "Jar: forRequest" {
// the 'global2' cookie
}
test "Jar: forRequest SameSite=Strict on cross-site navigation" {
const expectCookies = struct {
fn expect(expected: []const u8, jar: *Jar, target_url: [:0]const u8, opts: Jar.LookupOpts) !void {
var arr: std.ArrayList(u8) = .empty;
defer arr.deinit(testing.allocator);
try jar.forRequest(target_url, arr.writer(testing.allocator), opts);
try testing.expectEqual(expected, arr.items);
}
}.expect;
var jar = Jar.init(testing.allocator);
defer jar.deinit();
const victim_url: [:0]const u8 = "http://victim.example/";
try jar.add(try Cookie.parse(testing.allocator, victim_url, "sid=STRICT_COOKIE; Path=/; SameSite=Strict"), std.time.timestamp(), true);
// Same-site navigation: cookie included.
try expectCookies("sid=STRICT_COOKIE", &jar, "http://victim.example/transfer", .{
.origin_url = victim_url,
.is_http = true,
});
// Cross-site navigation from attacker.test: cookie excluded.
try expectCookies("", &jar, "http://victim.example/transfer", .{
.origin_url = "http://attacker.test/strict-form",
.is_http = true,
});
// Browser-initiated navigation (origin_url=null) is treated as same-site.
try expectCookies("sid=STRICT_COOKIE", &jar, "http://victim.example/transfer", .{
.is_http = true,
});
}
test "Cookie: parse key=value" {
try expectError(error.Empty, null, "");
try expectError(error.InvalidByteSequence, null, &.{ 'a', 30, '=', 'b' });

View File

@@ -23,7 +23,24 @@ const log = lp.log;
/// Comptime CLI builder that generates a tagged union parser from a
/// declarative command recipe. Each command becomes a union variant whose
/// payload is a struct with one field per option.
/// payload is a struct with one field per option. A `help` variant is added
/// automatically; do not include it in the recipe.
///
/// ## Parsing behavior
///
/// `parse` reads `std.process.args`, picks a command by the first non-exec
/// argument, then walks the rest as `--flag value` pairs. Quirks:
///
/// - When no command is given, the parser defaults to `serve`.
/// - `help`, `help <command>`, `<command> help`, and `<command> --help` all
/// yield the `help` union variant. When a command is named (in either
/// position), the variant carries that command's enum tag so callers can
/// print command-specific help; bare `help` and `help help` carry the
/// `.help` tag. An unknown name after `help` returns
/// `error.UnknownCommand`.
/// - Legacy fallback: if the first argument starts with `--` and matches a
/// known fetch/serve flag, the parser sniffs the command from it and
/// re-parses argv. Only exists for backwards compatibility.
///
/// ## Command descriptor fields
///
@@ -34,8 +51,9 @@ const log = lp.log;
/// command. Useful for common flags shared across commands.
/// - `positional: struct` (optional) — a single positional argument with
/// `.name` and `.type`. Type must be an optional pointer-to-u8 slice
/// (e.g. `?[:0]const u8`). Positionals can appear anywhere in argv and
/// must be provided; a missing positional returns `error.MissingArgument`.
/// (e.g. `?[:0]const u8`); it defaults to `null` and may appear anywhere
/// in argv. Passing it more than once returns
/// `error.TooManyPositionalArguments`.
///
/// ## Option descriptor fields
///
@@ -49,6 +67,8 @@ const log = lp.log;
/// `bool` or packed-struct options.
/// - `validator: fn` (optional) — custom parse function that replaces the
/// built-in type switch. See the validator section below.
/// - `variants: tuple` (optional) — alternate flag names that write into
/// the same field. See the variants section below.
///
/// ## Supported types and their defaults
///
@@ -60,10 +80,10 @@ const log = lp.log;
/// - `[]const u8`, `[:0]const u8` (and mutable variants) — string slices
/// duped from argv. Sentinel is preserved. Requires `default` unless `?`.
/// - Enums — parsed via `std.meta.stringToEnum`. Returns
/// `error.UnknownArgument` on a bad value. Requires `default` unless `?`.
/// `error.InvalidArgument` on a bad value. Requires `default` unless `?`.
/// - Packed structs of `bool` fields — parsed from a comma-separated list
/// (e.g. `--strip-mode js,css`). The literal `"full"` sets every field.
/// Unknown names return `error.UnknownArgument`. Requires `default`.
/// Unknown names return `error.InvalidArgument`. Requires `default`.
/// `multiple` is not supported.
/// - Optional types default to `null` when `default` is omitted.
///
@@ -80,6 +100,15 @@ const log = lp.log;
/// When a validator is present, the built-in type switch is skipped entirely.
/// The validator owns advancing the iterator and is free to peek ahead.
///
/// ## Variants
///
/// A `variants` tuple lets multiple flag names write into the same field
/// using different parse logic. Each variant has its own `.name` and an
/// optional `.validator` (with the same signatures as above); the option's
/// `type` and `multiple` are inherited. Useful for "value or file" pairs:
/// e.g. `--wait-script "code"` vs `--wait-script-file path/to/script.js`,
/// both populating the same `wait_script` field.
///
/// ## Example
///
/// ```zig
@@ -113,19 +142,25 @@ const log = lp.log;
/// .{ .name = "strip_mode", .type = StripMode, .default = .{} },
/// .{ .name = "wait_until", .type = ?WaitUntil },
/// .{ .name = "extra_header", .type = []const u8, .multiple = true },
/// .{
/// .name = "wait_script",
/// .type = ?[:0]const u8,
/// .variants = .{
/// .{ .name = "wait_script_file", .validator = readScriptFile },
/// },
/// },
/// },
/// .shared_options = CommonOptions,
/// },
/// .{ .name = "version", .options = .{} },
/// .{ .name = "help", .options = .{} },
/// });
///
/// const _, const cmd = try Cli.parse(arena);
/// switch (cmd) {
/// .serve => |opts| listen(opts.host, opts.port),
/// .fetch => |opts| fetch(opts.url.?, opts.dump),
/// .fetch => |opts| fetch(opts.url orelse return error.UrlRequired, opts.dump),
/// .version => printVersion(),
/// .help => printHelp(),
/// .help => |tag| printHelp(tag),
/// }
/// ```
pub fn Builder(comptime commands: anytype) type {
@@ -134,17 +169,24 @@ pub fn Builder(comptime commands: anytype) type {
/// Enum type for provided commands.
pub const Enum = blk: {
var enum_fields: [commands.len]std.builtin.Type.EnumField = undefined;
for (commands, 0..) |command, i| {
const len = commands.len + 1;
var enum_fields: [len]std.builtin.Type.EnumField = undefined;
var i: usize = 0;
while (i < commands.len) : (i += 1) {
const command = commands[i];
enum_fields[i] = .{ .name = command.name, .value = i };
}
// Entry for help.
enum_fields[i] = .{ .name = "help", .value = i };
break :blk @Type(.{
.@"enum" = .{
.decls = &.{},
.fields = &enum_fields,
.is_exhaustive = true,
.tag_type = std.math.IntFittingRange(0, commands.len),
.tag_type = std.math.IntFittingRange(0, len),
},
});
};
@@ -212,8 +254,12 @@ pub fn Builder(comptime commands: anytype) type {
/// Union type for provided commands.
pub const Union = blk: {
var union_fields: [commands.len]std.builtin.Type.UnionField = undefined;
for (commands, 0..) |command, i| {
const len = commands.len + 1;
var union_fields: [len]std.builtin.Type.UnionField = undefined;
var i: usize = 0;
while (i < commands.len) : (i += 1) {
const command = commands[i];
const Command = @TypeOf(command);
const options = command.options;
@@ -247,6 +293,10 @@ pub fn Builder(comptime commands: anytype) type {
union_fields[i] = .{ .name = command.name, .type = T, .alignment = @alignOf(T) };
}
// Entry for help; just takes `Enum` itself.
const Help = Enum;
union_fields[i] = .{ .name = "help", .type = Help, .alignment = @alignOf(Help) };
break :blk @Type(.{
.@"union" = .{
.decls = &.{},
@@ -268,27 +318,43 @@ pub fn Builder(comptime commands: anytype) type {
inline for (commands) |command| {
// Match a command.
if (std.mem.eql(u8, cmd_str, command.name)) {
const cmd_parsed = parseCommand(allocator, command, &args) catch |err| {
if (err == error.HelpRequested) {
// <subcommand> help requested, return help <subcommand>
var h = @FieldType(Union, "help"){};
if (@hasField(@FieldType(Union, "help"), "subcommand")) {
h.subcommand = command.name;
}
return .{ exec_name, @unionInit(Union, "help", h) };
} else return err;
};
const cmd_parsed = try parseCommand(allocator, command, &args);
return .{ exec_name, cmd_parsed };
}
}
// Help is not in `commands`; so, we have to special case it.
if (std.mem.eql(u8, cmd_str, "help")) {
// Check if we're followed by a command name.
const command_name: []const u8 = args.next() orelse {
// "lightpanda help"; short-circuit.
return .{ exec_name, @unionInit(Union, "help", .help) };
};
inline for (commands) |command| {
if (std.mem.eql(u8, command_name, command.name)) {
return .{
exec_name,
@unionInit(Union, "help", std.meta.stringToEnum(Enum, command.name).?),
};
}
}
// Treat `help help` as the full help.
if (std.mem.eql(u8, command_name, "help")) {
return .{ exec_name, @unionInit(Union, "help", .help) };
}
log.fatal(.app, "unknown command", .{ .arg = command_name });
return error.UnknownCommand;
}
// Last resort, try sniffing.
const command_enum = try sniffCommand(cmd_str);
// `help` takes no arguments; short-circuit so the sniffed flag
// isn't re-parsed as an unknown option.
// Legacy `--help` situation.
if (command_enum == .help) {
return .{ exec_name, .{ .help = .{} } };
return .{ exec_name, @unionInit(Union, "help", .help) };
}
// "cmd_str" wasn't a command but an option. We can't reset args, but
@@ -301,16 +367,7 @@ pub fn Builder(comptime commands: anytype) type {
inline for (commands) |command| {
if (std.mem.eql(u8, @tagName(command_enum), command.name)) {
const cmd_parsed = parseCommand(allocator, command, &args) catch |err| {
if (err == error.HelpRequested) {
// <subcommand> help requested, return help <subcommand>
var h = @FieldType(Union, "help"){};
if (@hasField(@FieldType(Union, "help"), "subcommand")) {
h.subcommand = command.name;
}
return .{ exec_name, @unionInit(Union, "help", h) };
} else return err;
};
const cmd_parsed = try parseCommand(allocator, command, &args);
return .{ exec_name, cmd_parsed };
}
}
@@ -609,9 +666,9 @@ pub fn Builder(comptime commands: anytype) type {
}
}
// Subcommand help: `lightpanda fetch help` or `lightpanda fetch --help`
// Subcommand help: `lightpanda fetch help` or `lightpanda fetch --help`.
if (std.mem.eql(u8, option_name, "help") or std.mem.eql(u8, option_name, "--help")) {
return error.HelpRequested;
return @unionInit(Union, "help", std.meta.stringToEnum(Enum, command.name).?);
}
// Encountered an option we don't know of.

355
src/help.zon Normal file
View File

@@ -0,0 +1,355 @@
.{
// MAX_HELP_LEN|
.serve =
\\serve command
\\Starts a WebSocket CDP server.
\\
\\Usage:
\\ {0s} serve [OPTIONS] [COMMON_OPTIONS]
\\
\\Options:
\\--host <HOST> Host of the CDP server.
\\ Defaults to "127.0.0.1".
\\
\\--port <INT> Port of the CDP server.
\\ Defaults to 9222.
\\
\\--advertise-host <HOST>
\\ The host to advertise, e.g. in the /json/version
\\ response. Useful, for example, when --host is 0.0.0.0.
\\ Defaults to --host value.
\\
\\--cdp-max-connections <INT>
\\ Maximum number of simultaneous CDP connections.
\\ Defaults to 16.
\\
\\--cdp-max-pending-connections <INT>
\\ Maximum pending connections in the accept queue.
\\ Defaults to 128.
\\
\\--cookie <PATH> Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
,
.fetch =
\\fetch command
\\Fetches the specified URL.
\\
\\Usage:
\\ {0s} fetch <url> [OPTIONS] [COMMON_OPTIONS]
\\
\\Options:
\\--dump <DUMP> Dumps the document to stdout.
\\ Defaults to no dump.
\\
\\ Allowed values:
\\ html Serialized HTML of the DOM.
\\ markdown Converts content to Markdown.
\\ semantic_tree JSON-serialized semantic tree.
\\ semantic_tree_text Pruned plain-text semantic tree.
\\
\\--strip-mode <STRIP> Comma-separated list of tag groups to remove from dump.
\\ Defaults to no-strip.
\\
\\ Allowed values:
\\ js script and link[as=script, rel=preload].
\\ ui Includes img, picture, video, CSS and SVG.
\\ css Includes style and link[rel=stylesheet].
\\ full Strip everything.
\\
\\--with-base Add a <base> tag in dump.
\\ Defaults to false.
\\
\\--with-frames Includes the contents of iframes.
\\ Defaults to false.
\\
\\--wait-ms <INT> Wait time in milliseconds. Supersedes all other --wait
\\ parameters.
\\ Defaults to 5000.
\\
\\--wait-until <UNTIL> Wait until the specified event. Checked before other
\\ --wait-* options.
\\ Defaults to 'done'. If --wait-selector, --wait-script
\\ or --wait-script-file specified, defaults to none.
\\
\\ Allowed values:
\\ "load", "domcontentloaded", "networkidle", "done".
\\
\\--wait-selector <QUERY> Wait for an element matching the CSS selector to
\\ appear. Checked after --wait-until condition is met.
\\
\\--wait-script <EXPR> Wait for a JavaScript expression to return truthy.
\\ Checked after --wait-until condition is met.
\\
\\--wait-script-file <PATH> Like --wait-script, but reads the script from a file.
\\
\\--inject-script <EXPR> JavaScript to execute as the document's <head> is
\\ parsed, before any other scripts in the page run.
\\ Can be passed multiple times; scripts run in order.
\\
\\--inject-script-file <PATH>
\\ Like --inject-script, but reads the script from a file.
\\ Can be passed multiple times; can be mixed with
\\ --inject-script and runs in CLI order.
\\
\\--terminate-ms <INT> Hard deadline in milliseconds. After this time elapses,
\\ JavaScript execution is forcibly terminated (e.g. for
\\ pages with endless scripts). Unlike --wait-ms, which
\\ only stops waiting, --terminate-ms aborts the page.
\\ Defaults to no terminate.
\\
\\--cookie <PATH> Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
\\--cookie-jar <PATH> Path to a JSON file to save cookies to on exit.
\\ (write-only).
\\ Defaults to no cookie saving.
,
.mcp =
\\mcp command
\\Starts an MCP (Model Context Protocol) server over stdio.
\\
\\Usage:
\\ {0s} mcp [OPTIONS] [COMMON_OPTIONS]
\\
\\Options:
\\--cookie <PATH> Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
\\--cookie-jar <PATH> Path to a JSON file to save cookies to on exit.
\\ (write-only).
\\ Defaults to no cookie saving.
,
.agent =
\\agent command
\\Starts an interactive AI agent that can browse the web.
\\
\\Usage:
\\ {0s} agent [SCRIPT] [OPTIONS] [COMMON_OPTIONS]
\\
\\Examples:
\\ {0s} agent (auto-detects API key from env)
\\ {0s} agent --provider anthropic --model claude-sonnet-4-6
\\ {0s} agent --provider ollama --model gemma4
\\ {0s} agent --no-llm (basic PandaScript-only REPL)
\\ {0s} agent script.lp (replay a recorded script)
\\ {0s} agent -i script.lp (replay then drop into REPL,
\\ appending new commands to the file)
\\
\\Arguments:
\\[SCRIPT] Optional path to a .lp script.
\\ Without -i: replays the script (no LLM calls).
\\ With -i: replays if present, then enters the REPL
\\ and appends new commands to the file (creating
\\ it if it does not yet exist).
\\ Caution: .lp files can contain EVAL blocks that
\\ run arbitrary JavaScript in the page. Only replay
\\ scripts you trust, the same way you would a shell
\\ script.
\\
\\Options:
\\--provider <PROVIDER> The AI provider.
\\ When omitted, lightpanda auto-detects an API key
\\ from your environment (ANTHROPIC_API_KEY,
\\ OPENAI_API_KEY, GOOGLE_API_KEY/GEMINI_API_KEY).
\\ With exactly one key set: that provider is used.
\\ With multiple keys on a TTY: you'll be prompted
\\ to pick; in non-interactive contexts, pass
\\ --provider explicitly. With no keys set: falls
\\ back to the basic REPL (PandaScript only, no
\\ natural-language input, no LOGIN /
\\ ACCEPT_COOKIES keywords, no --self-heal).
\\
\\ Allowed values:
\\ "anthropic", "openai", "gemini", "ollama".
\\
\\--no-llm Force the basic REPL even when an API key is
\\ present or --provider is set. Useful for testing
\\ PandaScript without burning tokens, or for
\\ disabling the LLM in a saved command without
\\ editing the existing flags. Wins over --provider.
\\
\\--model <MODEL> The model name to use.
\\ Defaults to a sensible default per provider.
\\ Wins over --pick-model.
\\
\\--pick-model Fetch the provider's model list and prompt you
\\ to pick one at startup, instead of using the
\\ baked-in default. Requires a TTY. Ignored when
\\ --model is also passed.
\\
\\--base-url <URL> Override the API base URL for the provider.
\\ Defaults to the provider's standard endpoint.
\\ Ollama default: http://localhost:11434/v1.
\\
\\--system-prompt <STRING> Override the default system prompt.
\\
\\--self-heal On tool errors, ask the model to recover by
\\ retrying with fresh page state instead of
\\ aborting.
\\
\\-i, --interactive After replaying the positional script (if any),
\\ drop into the REPL with the browser state
\\ preserved. When a positional script is present,
\\ any new commands entered in the REPL are appended
\\ to that file.
\\ Conflicts with --task.
\\
\\--task <STRING> One-shot mode: run a single user turn, print the
\\ final answer to stdout, and exit. Conflicts with
\\ the positional script and with --interactive.
\\
\\-a, --attach <PATH> Feed a local file to the model alongside --task.
\\ Repeatable, one file per flag. Text files are
\\ inlined (max 512 KiB each); images/audio/pdf are
\\ base64-encoded (max 20 MiB each). Requires --task.
\\
\\--list-models Print the model IDs usable with `agent` for
\\ --provider, one per line, sorted, and exit.
\\ Auto-detects the provider from env when
\\ --provider is omitted.
\\
\\--verbosity <LEVEL> Stderr chatter level.
\\ Default: high when --task captures stderr to a
\\ pipe or file; low otherwise. low/medium also
\\ raise --log-level to err (mutes page-side
\\ console.error spam) unless --log-level is set
\\ explicitly.
\\
\\ Allowed values:
\\ low Silent in --task mode (final answer to
\\ stdout only); spinner + summary in REPL.
\\ medium + one `● [tool: ...]` line per call.
\\ high + the matching `[result: ...]` body
\\ (required by the benchmarks harness).
\\
\\API keys are read from the environment: ANTHROPIC_API_KEY, OPENAI_API_KEY,
\\or GOOGLE_API_KEY/GEMINI_API_KEY. Ollama does not require an API key.
,
.version =
\\version command
\\Displays the version of {0s}.
\\
\\Usage:
\\ {0s} version
,
.help =
\\help command
\\Displays this message.
\\
\\Usage:
\\ {0s} help
,
.common =
\\Common Options:
\\--insecure-disable-tls-host-verification
\\ Disables host verification on all HTTP requests.
\\ Only set this if you understand and accept the risk.
\\
\\--obey-robots Fetches and obeys robots.txt of the target page.
\\ Defaults to false.
\\
\\--disable-subframes Skip loading <iframe> elements. The parser still
\\ registers them in the DOM, but no child frame or
\\ Page.frameAttached events are produced.
\\ Defaults to false.
\\
\\--disable-workers Skip loading dedicated Web Workers. The Worker
\\ constructor still returns a Worker object, but no
\\ script fetch is initiated and its scope never runs.
\\ Defaults to false.
\\
\\--block-private-networks Block HTTP requests to private/internal IP
\\ addresses after DNS resolution.
\\ Defaults to false.
\\
\\--block-cidrs <LIST> Additional CIDR ranges to block, comma-separated.
\\ Prefix with '-' to allow (exempt from blocking).
\\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32
\\ Can be combined with --block-private-networks.
\\
\\--http-proxy <URL> HTTP proxy for all HTTP requests.
\\ username:password may be included for basic auth.
\\ Defaults to none.
\\
\\--proxy-bearer-token <TOKEN>
\\ Token sent for bearer authentication with the
\\ proxy: Proxy-Authorization: Bearer <token>.
\\
\\--http-max-concurrent <INT>
\\ Maximum number of concurrent HTTP requests.
\\ Defaults to 10.
\\
\\--http-max-host-open <INT> Maximum open connections to a given host:port.
\\ Defaults to 4.
\\
\\--http-connect-timeout <INT>
\\ Time in ms to establish an HTTP connection before
\\ timing out. 0 means never.
\\ Defaults to 0.
\\
\\--http-timeout <INT> Maximum time in ms the transfer is allowed to
\\ complete. 0 means never.
\\ Defaults to 10000.
\\
\\--http-max-response-size <INT>
\\ Limits the acceptable response size for any
\\ request (e.g. XHR, fetch, script loading).
\\ Defaults to no limit.
\\
\\--ws-max-concurrent <INT> Maximum number of concurrent WebSocket connections.
\\ Defaults to 8.
\\
\\--log-level <LEVEL> The log level.
\\ Defaults to {1s}.
\\
\\ Allowed values:
\\ "debug", "info", "warn", "error", "fatal".
\\
\\--log-format <FORMAT> The log format.
\\ Defaults to {2s}.
\\
\\ Allowed values: "pretty", "logfmt".
\\
\\--log-filter-scopes <SCOPES>
\\ Filter out too-verbose logs per scope,
\\ comma-separated. e.g. http, unknown_prop, event.
\\
\\--user-agent <STRING> Override the User-Agent header entirely.
\\ Must not impersonate other browsers; any value
\\ containing "Mozilla" is forbidden. The browser
\\ still sends Sec-Ch-Ua. Incompatible with
\\ --user-agent-suffix.
\\
\\--user-agent-suffix <STRING>
\\ Suffix appended to the Lightpanda/X.Y User-Agent.
\\
\\--web-bot-auth-key-file <PATH>
\\ Path to the Ed25519 private key PEM file.
\\
\\--web-bot-auth-keyid <STRING>
\\ The JWK thumbprint of your public key.
\\
\\--web-bot-auth-domain <DOMAIN>
\\ Your domain, e.g. yourdomain.com.
\\
\\--http-cache-dir <PATH> Directory used as a filesystem cache for network
\\ resources. Omitting this disables caching.
\\ Defaults to no caching.
\\
\\--cookie <PATH> Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
\\--cookie-jar <PATH> Path to a JSON file to save cookies to on exit
\\ (write-only).
\\ Defaults to no cookie saving.
\\
\\--storage-engine <ENGINE> The storage engine to use.
\\ Defaults to none.
\\
\\ Allowed values: "none", "sqlite".
\\
\\--storage-sqlite-path <PATH>
\\ Path to the SQLite database file for persistent
\\ storage. Use ":memory:" for in-memory storage.
,
}

View File

@@ -57,10 +57,7 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
defer args.deinit(main_arena);
switch (args.mode) {
.help => {
args.printUsageAndExit(true);
return std.process.cleanExit();
},
.help => |tag| return args.printUsageAndExit(tag, true),
.version => {
var stdout = std.fs.File.stdout().writer(&.{});
try stdout.interface.print("{s}\n", .{lp.build_config.version});
@@ -101,7 +98,7 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
log.debug(.app, "startup", .{ .mode = "serve", .snapshot = app.snapshot.fromEmbedded() });
const address = std.net.Address.parseIp(opts.host, opts.port) catch |err| {
log.fatal(.app, "invalid server address", .{ .err = err, .host = opts.host, .port = opts.port });
return args.printUsageAndExit(false);
return args.printUsageAndExit(.serve, false);
};
var server = lp.Server.init(app, address) catch |err| {