From 13ebb9e802e988d6efddee8fee24d8687942dc8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 21 May 2026 22:39:58 +0200 Subject: [PATCH] agent: simplify command, schema, and repl logic - Remove `input_schema_raw` from `SchemaInfo` and use `parameters` directly - Simplify JSON field extraction in `Verifier` to avoid `parseArgs` - Refactor REPL command splitting and environment substitution - Clean up and condense comments across the codebase --- src/agent/Agent.zig | 63 ++++++++---------------- src/agent/CommandRunner.zig | 30 ++++++------ src/agent/SlashCommand.zig | 8 +-- src/mcp/tools.zig | 12 ++--- src/script/Recorder.zig | 2 + src/script/Verifier.zig | 19 ++++--- src/script/command.zig | 98 +++++++++---------------------------- src/script/schema.zig | 65 +++++++----------------- 8 files changed, 100 insertions(+), 197 deletions(-) diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index 0a0ab330..57afb2d9 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -143,9 +143,8 @@ notification: *lp.Notification, browser: lp.Browser, session: *lp.Session, node_registry: CDPNode.Registry, -/// Provider-facing tool list, built from `SlashCommand.globalSchemas()`. Slice -/// is owned by `allocator`; the `parameters` JSON `Value` each entry points at -/// lives in the schema module's process-lifetime arena. +/// Slice is owned by `allocator`; each entry's `parameters` JSON value points +/// into the schema module's process-lifetime arena, so no per-entry free. tools: []const zenai.provider.Tool, terminal: Terminal, cmd_runner: CommandRunner, @@ -445,8 +444,8 @@ fn runRepl(self: *Agent) void { if (line.len == 0) continue; - if (line[0] == '/') { - const split = SlashCommand.splitNameRest(line[1..]) orelse continue :repl; + const slash_split: ?SlashCommand.Split = if (line[0] == '/') SlashCommand.splitNameRest(line[1..]) else null; + if (slash_split) |split| { if (SlashCommand.findMeta(split.name) != null) { if (self.handleMeta(split.name, split.rest)) break :repl; continue :repl; @@ -480,12 +479,11 @@ fn runRepl(self: *Agent) void { continue :repl; } const prompt = if (cmd == .login) login_prompt else accept_cookies_prompt; - const label: []const u8 = if (cmd == .login) "LOGIN" else "ACCEPT_COOKIES"; + const label: []const u8 = if (cmd == .login) "/login" else "/acceptCookies"; _ = self.runTurn(.{ .prompt = prompt, .record_comment = line, .label = label }); }, .tool_call => |tc| { - const split = SlashCommand.splitNameRest(line[1..]) orelse unreachable; - self.terminal.beginTool(tc.name, split.rest); + self.terminal.beginTool(tc.name, slash_split.?.rest); const result = self.cmd_runner.executeWithResult(aa, cmd); self.terminal.endTool(); self.cmd_runner.printResult(cmd, result); @@ -561,14 +559,9 @@ fn printSlashHelp(self: *Agent, target: []const u8) void { var arena: std.heap.ArenaAllocator = .init(self.allocator); defer arena.deinit(); - const aa = arena.allocator(); - const pretty: []const u8 = blk: { - const v = std.json.parseFromSliceLeaky(std.json.Value, aa, schema.input_schema_raw, .{}) catch break :blk schema.input_schema_raw; - var aw: std.Io.Writer.Allocating = .init(aa); - std.json.Stringify.value(v, .{ .whitespace = .indent_2 }, &aw.writer) catch break :blk schema.input_schema_raw; - break :blk aw.written(); - }; - self.terminal.printInfoFmt("schema:\n{s}", .{pretty}); + var aw: std.Io.Writer.Allocating = .init(arena.allocator()); + std.json.Stringify.value(schema.parameters, .{ .whitespace = .indent_2 }, &aw.writer) catch return; + self.terminal.printInfoFmt("schema:\n{s}", .{aw.written()}); } fn printSlashParseError(self: *Agent, err: SlashCommand.ParseError, name: []const u8) void { @@ -620,9 +613,9 @@ fn runScript(self: *Agent, path: []const u8) bool { }) orelse break; switch (entry.command) { .comment => { - // Recorded scripts prefix LLM-generated commands with the - // natural-language prompt that produced them; keep the - // last one around so self-heal can use it as context. + // `#` prefix lines preceding a recorded action are the + // natural-language prompt that produced it — kept for + // self-heal context. if (entry.opener_line.len > 2 and entry.opener_line[0] == '#') { last_comment = std.mem.trim(u8, entry.opener_line[1..], &std.ascii.whitespace); } @@ -787,8 +780,6 @@ fn ensureSystemPrompt(self: *Agent) !void { } } -// Drop older turns once `prune_high` is hit; survivors are deep-copied so -// the old arena (which still pins dropped strings) can be released. const prune_high = 30; const prune_keep = 20; @@ -798,10 +789,8 @@ fn pruneMessages(self: *Agent) void { const tail_start = zenai.provider.safeTruncationStart(msgs, msgs.len - prune_keep) orelse return; - // Dupe the kept tail into a scratch slice in the new arena first. Only - // mutate self.messages once every dupe has succeeded — otherwise a - // partial failure would leave self.messages.items[1..] pointing into - // the freed `new_arena`. + // Dupe into the new arena before mutating self.messages — a partial + // failure would otherwise leave items pointing into a freed arena. var new_arena: std.heap.ArenaAllocator = .init(self.allocator); const duped = zenai.provider.dupeMessages(new_arena.allocator(), msgs[tail_start..]) catch { new_arena.deinit(); @@ -852,9 +841,8 @@ fn runHealTurn(self: *Agent, arena: std.mem.Allocator, prompt: []const u8) ![]Co var cmds: std.ArrayList(Command) = .empty; for (result.tool_calls_made) |tc| { if (tc.is_error) continue; - // Deep-copy into the caller's arena: `result.deinit()` (deferred above) - // frees `tc.arguments`'s backing arena before the returned cmds are - // formatted by `attemptSelfHeal`. + // `result.deinit()` (deferred above) frees the args arena before the + // caller formats `cmds`; deep-copy into `arena` to outlive it. const cmd = try Command.fromToolCallOwned(arena, tc.name, tc.arguments); if (!cmd.canHeal()) { self.terminal.printInfoFmt( @@ -1036,21 +1024,17 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 { } r.record(cmd); } - // Recorder self-disables on write failure (disk full, fd closed). Tell - // the user the recording stopped instead of silently dropping appends. if (!r.isActive()) { self.terminal.printError("recording disabled (write failed); see logs"); } }; - // RunToolsResult arenas are deinited at the end of this function — - // dupe into `message_arena` so the returned slice outlives them. + // Dupe into `message_arena` — RunToolsResult arenas are deinited below. const final_text: ?[]const u8 = blk: { if (result.text) |text| break :blk try ma.dupe(u8, text); - // Tool loop ended without a final text — force one more turn that - // forbids tools and pretraining fallback. Without this, models - // confabulate answers when the page was blocked or empty. + // Without a synthesis turn forbidding tools+pretraining, models + // confabulate when the page was blocked or empty. log.info(.app, "synthesizing final answer", .{}); const synth_baseline = self.messages.items.len; try self.messages.append(self.allocator, .{ @@ -1065,17 +1049,12 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 { ma, .{ .context = @ptrCast(self), .callFn = handleToolCall }, .{ - // tool_choice = .none forbids tools; serializing the full - // catalog anyway just pads the request body. .tools = &.{}, .max_turns = 1, .max_tokens = 4096, .tool_choice = .none, - // Cap thinking on the finalize turn. Fully disabling it (0) - // leaves reasoning-heavy tasks with no answer at all; letting - // it run unbounded lets models fill the turn with thoughts - // and emit nothing as the final text. 512 tokens is enough - // for the model to pick its answer but not to freewheel. + // .low (≈512 tokens) so reasoning models still pick an answer + // but can't burn the whole turn on thinking and emit nothing. .thinking_level = .low, .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel }, }, diff --git a/src/agent/CommandRunner.zig b/src/agent/CommandRunner.zig index 6d3db5cc..264398d4 100644 --- a/src/agent/CommandRunner.zig +++ b/src/agent/CommandRunner.zig @@ -62,18 +62,22 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ? const is_fill = std.mem.eql(u8, tool_name, @tagName(browser_tools.Action.fill)); - var needs_sub = false; + const needsSub = struct { + fn f(is_fill_: bool, key: []const u8, val: std.json.Value) bool { + if (is_fill_ and std.mem.eql(u8, key, "value")) return false; + return val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null; + } + }.f; + + var needs_any = false; var it = v.object.iterator(); while (it.next()) |entry| { - const key = entry.key_ptr.*; - const val = entry.value_ptr.*; - const exclude = is_fill and std.mem.eql(u8, key, "value"); - if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) { - needs_sub = true; + if (needsSub(is_fill, entry.key_ptr.*, entry.value_ptr.*)) { + needs_any = true; break; } } - if (!needs_sub) return v; + if (!needs_any) return v; var new_obj: std.json.ObjectMap = .init(arena); try new_obj.ensureTotalCapacity(v.object.count()); @@ -81,13 +85,11 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ? while (it.next()) |entry| { const key = entry.key_ptr.*; const val = entry.value_ptr.*; - const exclude = is_fill and std.mem.eql(u8, key, "value"); - if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) { - const resolved = try browser_tools.substituteEnvVars(arena, val.string); - try new_obj.put(key, .{ .string = resolved }); - continue; - } - try new_obj.put(key, val); + const new_val: std.json.Value = if (needsSub(is_fill, key, val)) + .{ .string = try browser_tools.substituteEnvVars(arena, val.string) } + else + val; + try new_obj.put(key, new_val); } return .{ .object = new_obj }; } diff --git a/src/agent/SlashCommand.zig b/src/agent/SlashCommand.zig index 6a460686..0ecacb05 100644 --- a/src/agent/SlashCommand.zig +++ b/src/agent/SlashCommand.zig @@ -17,15 +17,12 @@ // along with this program. If not, see . //! REPL-only meta slash commands and re-exports of the PandaScript schema -//! primitives. The actual slash-command grammar lives in `script/schema.zig`; -//! this module keeps the agent-only meta commands (`/help`, `/quit`, -//! `/verbosity`) that aren't part of the script. +//! primitives. The actual slash-command grammar lives in `script/schema.zig`. const std = @import("std"); const lp = @import("lightpanda"); const schema = lp.script.schema; -// Re-export so existing call sites (Agent, Terminal) keep their import path. pub const SchemaInfo = schema.SchemaInfo; pub const ParseError = schema.ParseError; pub const Split = schema.Split; @@ -37,8 +34,7 @@ pub const findSchema = schema.findSchema; pub const findSchemaCanonical = schema.findSchemaCanonical; pub const splitNameRest = schema.splitNameRest; -/// Meta slash commands handled directly by the agent (not by ToolExecutor). -/// Kept in sync with `handleMeta` in Agent.zig. +/// Meta slash commands handled directly by Agent.handleMeta. pub const MetaCommand = struct { name: [:0]const u8, /// Ghost-text fragment shown after the name + space. Empty when the diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index f1e048d8..3309c388 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -182,6 +182,11 @@ fn surfacesErrorInBand(action: browser_tools.Action) bool { return action == .eval or action == .extract; } +fn surfacesErrorInBandByName(name: []const u8) bool { + const action = std.meta.stringToEnum(browser_tools.Action, name) orelse return false; + return surfacesErrorInBand(action); +} + fn recordIfActive(server: *Server, name: []const u8, arguments: ?std.json.Value) void { if (server.recorder == null) return; const cmd = Command.fromToolCall(name, arguments); @@ -266,13 +271,8 @@ fn handleScriptStep(server: *Server, arena: std.mem.Allocator, id: std.json.Valu } const tc = cmd.tool_call; - const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse { - return sendErrorContent(server, id, "internal: unknown action"); - }; - const result = browser_tools.call(arena, server.session, &server.node_registry, tc.name, tc.args) catch |err| { - // eval/extract get a terse error — they're line-scoped already. - if (surfacesErrorInBand(action)) { + if (surfacesErrorInBandByName(tc.name)) { return sendErrorContent(server, id, @errorName(err)); } const url = browser_tools.currentUrlOrPlaceholder(server.session); diff --git a/src/script/Recorder.zig b/src/script/Recorder.zig index 034a680e..e422cc55 100644 --- a/src/script/Recorder.zig +++ b/src/script/Recorder.zig @@ -129,6 +129,8 @@ fn disable(self: *Recorder, err: anyerror) void { } } +// --- Tests --- + fn parseLine(arena: std.mem.Allocator, line: []const u8) Command { return Command.parse(arena, line) catch unreachable; } diff --git a/src/script/Verifier.zig b/src/script/Verifier.zig index 46a4a2ff..56c9f505 100644 --- a/src/script/Verifier.zig +++ b/src/script/Verifier.zig @@ -60,19 +60,26 @@ pub fn verify(self: *Verifier, arena: std.mem.Allocator, cmd: Command) VerifyRes else => return .inconclusive, }; const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse return .inconclusive; + const args = tc.args orelse return .inconclusive; + if (args != .object) return .inconclusive; + const selector = (args.object.get("selector") orelse return .inconclusive); + if (selector != .string) return .inconclusive; switch (action) { .fill => { - const a = browser_tools.parseArgs(struct { selector: []const u8, value: []const u8 }, arena, tc.args) catch return .inconclusive; - return self.verifyFill(arena, a.selector, a.value); + const value = args.object.get("value") orelse return .inconclusive; + if (value != .string) return .inconclusive; + return self.verifyFill(arena, selector.string, value.string); }, .setChecked => { - const a = browser_tools.parseArgs(struct { selector: []const u8, checked: bool }, arena, tc.args) catch return .inconclusive; - return self.verifyCheck(arena, a.selector, a.checked); + const checked = args.object.get("checked") orelse return .inconclusive; + if (checked != .bool) return .inconclusive; + return self.verifyCheck(arena, selector.string, checked.bool); }, .selectOption => { - const a = browser_tools.parseArgs(struct { selector: []const u8, value: []const u8 }, arena, tc.args) catch return .inconclusive; - return self.verifySelect(arena, a.selector, a.value); + const value = args.object.get("value") orelse return .inconclusive; + if (value != .string) return .inconclusive; + return self.verifySelect(arena, selector.string, value.string); }, else => return .inconclusive, } diff --git a/src/script/command.zig b/src/script/command.zig index 5f448378..553816dc 100644 --- a/src/script/command.zig +++ b/src/script/command.zig @@ -16,14 +16,9 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -//! PandaScript Command — one line of a `.lp` script: a slash command, a -//! `#`-comment, or one of two LLM triggers (`/login`, `/acceptCookies`). -//! Anything else is a parse error; bare prose → LLM only happens in the -//! live REPL and is dispatched there, not here. -//! -//! Multi-line `/eval '''…'''` / `/extract '''…'''` blocks live in -//! `ScriptIterator`, which assembles the body before calling `parse` on the -//! synthesized line. +//! PandaScript Command: a slash command, `#`-comment, or `/login` / +//! `/acceptCookies` LLM trigger. Bare prose is the REPL's job, not the parser's. +//! Multi-line `'''…'''` blocks are assembled by `ScriptIterator` before parse. const std = @import("std"); const lp = @import("lightpanda"); @@ -40,9 +35,7 @@ pub const Command = union(enum) { comment: void, pub const ToolCall = struct { - /// Slice into the schema table — lives forever, no dupe required. name: []const u8, - /// Arena-owned. `null` for tools with no args (e.g. `/getCookies`). args: ?std.json.Value, }; @@ -53,9 +46,8 @@ pub const Command = union(enum) { .tool_call => |tc| blk: { const s = schema.findSchemaCanonical(schema.globalSchemas(), tc.name) orelse break :blk false; if (!s.recorded) break :blk false; - // backendNodeId-based calls aren't replayable (the id is - // invalidated by any DOM mutation), so keep them out of the - // recording even when the tool itself is recordable. + // backendNodeId is invalidated by any DOM mutation, so calls + // using it aren't replayable. const args = tc.args orelse break :blk true; if (args == .object and args.object.contains("backendNodeId")) break :blk false; break :blk true; @@ -77,9 +69,6 @@ pub const Command = union(enum) { }; } - /// Self-heal must only patch the current page; navigation is excluded - /// even though `/goto` is recorded. The decision lives on the per-tool - /// `can_heal` flag in `tool_defs`; here it's just a lookup. pub fn canHeal(self: Command) bool { return switch (self) { .tool_call => |tc| if (schema.findSchemaCanonical(schema.globalSchemas(), tc.name)) |s| s.can_heal else false, @@ -87,17 +76,10 @@ pub const Command = union(enum) { }; } - /// Parse one trimmed line. Branch order: blank/`#` → `.comment`; - /// `/login` and `/acceptCookies` short-circuit to their meta variants; - /// any other `/` resolves the schema and parses args; anything - /// else returns `error.NotASlashCommand`. Bare-prose-to-LLM is the REPL's - /// job, not the parser's. pub fn parse(arena: std.mem.Allocator, line: []const u8) ParseError!Command { return parseWithSchemas(arena, line, schema.globalSchemas()); } - /// Same as `parse` but lets callers inject a different schema set — - /// the agent uses its own arena-backed cache to avoid double-parsing. pub fn parseWithSchemas(arena: std.mem.Allocator, line: []const u8, schemas: []const schema.SchemaInfo) ParseError!Command { const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace); if (trimmed.len == 0) return .{ .comment = {} }; @@ -120,10 +102,7 @@ pub const Command = union(enum) { return .{ .tool_call = .{ .name = s.tool_name, .args = args } }; } - /// Round-trips with `parse` for the canonical recorder output. Single- - /// required-field tools emit positional + quoted (`/click '#login'`); - /// everything else emits `/name key=value ...`. Multi-line string values - /// use `'''…'''` blocks. Default-true booleans are omitted when matching. + /// Canonical recorder format. Round-trips with `parse`. pub fn format(self: Command, writer: *std.Io.Writer) std.Io.Writer.Error!void { switch (self) { .login => try writer.writeAll("/login"), @@ -133,26 +112,21 @@ pub const Command = union(enum) { } } - /// `name` and `arguments` must outlive the returned Command. Use - /// `fromToolCallOwned` when that guarantee doesn't hold. + /// `name` and `arguments` must outlive the returned Command — use + /// `fromToolCallOwned` to deep-copy when they don't. pub fn fromToolCall(tool_name: []const u8, arguments: ?std.json.Value) Command { return .{ .tool_call = .{ .name = tool_name, .args = arguments } }; } - /// Deep-copies `arguments` into `arena` so the Command can outlive the - /// caller's args buffer (e.g. the self-heal path returns Commands across - /// an arena deinit). pub fn fromToolCallOwned(arena: std.mem.Allocator, tool_name: []const u8, arguments: ?std.json.Value) std.mem.Allocator.Error!Command { const owned_name = if (schema.findSchemaCanonical(schema.globalSchemas(), tool_name)) |s| s.tool_name else try arena.dupe(u8, tool_name); const owned_args = if (arguments) |v| try dupeJsonValue(arena, v) else null; return .{ .tool_call = .{ .name = owned_name, .args = owned_args } }; } - /// Walks `.lp` content line-by-line, gluing multi-line `'''…'''` blocks - /// (today: `/eval`, `/extract`; any single-required-string-field tool - /// qualifies) into a single entry. Comments and blank lines surface as - /// `.comment` entries so the script replay can attach prefacing comments - /// to the next executable line. + /// Iterates `.lp` content, gluing multi-line `'''…'''` blocks into a + /// single entry. Comments surface as `.comment` so the replay can attach + /// the preceding comment to the next executable line. pub const ScriptIterator = struct { allocator: std.mem.Allocator, lines: std.mem.SplitIterator(u8, .scalar), @@ -168,14 +142,12 @@ pub const Command = union(enum) { pub const Entry = struct { line_num: u32, - /// Trimmed opener line — the only line for single-line entries, - /// the `/eval '''` / `/extract '''` opener for blocks. Display-only - /// (errors, REPL echo, heal-comment headers); use `raw_span` for - /// splices that need the full block body. + /// Trimmed opener line; use `raw_span` for splices that need the + /// full block body. opener_line: []const u8, - /// The full slice of the original content buffer covering this entry, - /// including trailing newline(s). For multi-line blocks this spans - /// from the opener through the closing triple-quote line. + /// Slice of the original content buffer covering this entry, + /// trailing newline included. Multi-line blocks span opener + /// through closing triple-quote. raw_span: []const u8, command: Command, }; @@ -225,8 +197,6 @@ pub const Command = union(enum) { quote_type: QuoteType, }; - /// `/eval '''` or `/extract '''` (and any other single-required-string-field - /// tool followed by a bare triple-quote token). fn tryBlockOpener(_: *ScriptIterator, line: []const u8, schemas: []const schema.SchemaInfo) ParseError!?BlockOpener { if (line.len < 2 or line[0] != '/') return null; const split = schema.splitNameRest(line[1..]) orelse return null; @@ -239,7 +209,6 @@ pub const Command = union(enum) { fn collectMultiLineBlock(self: *ScriptIterator, quote_type: QuoteType) std.mem.Allocator.Error!?[]const u8 { const closer = quote_type.toLiteral(); var parts: std.ArrayList(u8) = .empty; - // toOwnedSlice empties `parts`, so this defer is a no-op on success. defer parts.deinit(self.allocator); while (self.lines.next()) |line| { self.line_num += 1; @@ -250,7 +219,7 @@ pub const Command = union(enum) { if (parts.items.len > 0) { try parts.append(self.allocator, '\n'); } - // Strip trailing CR only — full trim would clobber indentation. + // Trim CR only; full trim would clobber indentation. try parts.appendSlice(self.allocator, std.mem.trimRight(u8, line, "\r")); } return null; @@ -258,8 +227,6 @@ pub const Command = union(enum) { }; }; -/// Deep-copy a `std.json.Value`, duplicating all owned strings and containers -/// into `a`. Used by `fromToolCallOwned` for the heal path. fn dupeJsonValue(a: std.mem.Allocator, value: std.json.Value) std.mem.Allocator.Error!std.json.Value { return switch (value) { .null, .bool, .integer, .float => value, @@ -296,16 +263,11 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er const args = args_val.object; if (args.count() == 0) return; - // Emit positional form only when the args reduce to the single required - // field: `/goto ''`, `/click ''`, `/extract ''`. As soon - // as there are extra fields (`/selectOption selector=... value=...`, - // `/setChecked selector=... checked=false`), fall back to kv so the - // recording stays unambiguous. + // Positional form `/goto ''` only when args reduce to the single + // required field; extra fields force kv so recordings stay unambiguous. var positional_emitted: ?[]const u8 = null; if (s_opt) |s| { const has_one_required = s.required.len == 1; - // Count visible fields, ignoring default-true booleans that we'd skip - // in the kv pass below — they don't make the args "non-trivial". var visible: usize = 0; var it_v = args.iterator(); while (it_v.next()) |entry| { @@ -322,8 +284,6 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er } } - // Emit kv for every key not already used as the positional, *and* skip - // default-true booleans so `/setChecked selector='#a'` round-trips. var it = args.iterator(); while (it.next()) |entry| { const key = entry.key_ptr.*; @@ -340,8 +300,6 @@ fn isDefaultTrueBool(s: *const schema.SchemaInfo, key: []const u8, v: std.json.V return v == .bool and v.bool and s.isFieldDefaultTrue(key); } -/// Strings are always quoted (or triple-quoted when they contain newlines) -/// so a recorded line is unambiguous regardless of the value's content. fn formatString(writer: *std.Io.Writer, s: []const u8) std.Io.Writer.Error!void { if (std.mem.indexOfScalar(u8, s, '\n') != null) { const q = QuoteType.pickFor(s).toLiteral(); @@ -355,7 +313,6 @@ fn formatString(writer: *std.Io.Writer, s: []const u8) std.Io.Writer.Error!void try writeQuoted(writer, s); } -/// Kv-value emission: strings via `formatString`; numbers/bools bare. fn formatKvValue(writer: *std.Io.Writer, v: std.json.Value) std.Io.Writer.Error!void { switch (v) { .string => |s| try formatString(writer, s), @@ -363,10 +320,7 @@ fn formatKvValue(writer: *std.Io.Writer, v: std.json.Value) std.Io.Writer.Error! .float => |n| try writer.print("{d}", .{n}), .bool => |b| try writer.writeAll(if (b) "true" else "false"), .null => try writer.writeAll("null"), - else => { - // Arrays/objects emit as compact JSON. - std.json.Stringify.value(v, .{}, writer) catch return error.WriteFailed; - }, + else => std.json.Stringify.value(v, .{}, writer) catch return error.WriteFailed, } } @@ -410,9 +364,7 @@ pub const QuoteType = enum { }; } - /// Pick the triple-quote delimiter that does not collide with `body`. - /// Defaults to `triple_single`; swaps to `triple_double` only when the - /// body already contains `'''`. + /// Default `'''`; swaps to `"""` only when the body already contains `'''`. pub fn pickFor(body: []const u8) QuoteType { if (std.mem.indexOf(u8, body, "'''") != null) return .triple_double; return .triple_single; @@ -454,12 +406,10 @@ test "parse: /goto positional" { try testing.expectEqualStrings("https://example.com", cmd.tool_call.args.?.object.get("url").?.string); } -test "parse: /click positional" { +test "parse: /click rejects positional (zero required fields)" { var arena: std.heap.ArenaAllocator = .init(testing.allocator); defer arena.deinit(); - // click has zero required fields — `/click 'Login'` would be PositionalNotAllowed. try testing.expectError(error.PositionalNotAllowed, Command.parse(arena.allocator(), "/click 'Login'")); - // The valid form is kv. const cmd = try Command.parse(arena.allocator(), "/click selector='Login'"); try testing.expectEqualStrings("Login", cmd.tool_call.args.?.object.get("selector").?.string); } @@ -492,18 +442,16 @@ test "format: /goto round-trip" { var aw: std.Io.Writer.Allocating = .init(testing.allocator); defer aw.deinit(); try cmd.format(&aw.writer); - // Recorder always quotes string values for unambiguous round-trips. try testing.expectEqualStrings("/goto 'https://example.com'", aw.written()); } -test "format: /click emits positional for single-required tools? no — click has zero required" { +test "format: /click stays kv (zero required fields)" { var arena: std.heap.ArenaAllocator = .init(testing.allocator); defer arena.deinit(); const cmd = try Command.parse(arena.allocator(), "/click selector='Login'"); var aw: std.Io.Writer.Allocating = .init(testing.allocator); defer aw.deinit(); try cmd.format(&aw.writer); - // Click has zero required fields, so kv form is canonical. try testing.expectEqualStrings("/click selector='Login'", aw.written()); } diff --git a/src/script/schema.zig b/src/script/schema.zig index bfa3b15b..5fb957ba 100644 --- a/src/script/schema.zig +++ b/src/script/schema.zig @@ -16,14 +16,8 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -//! Slash-command schema: the parsed view of `browser_tools.tool_defs` that -//! both PandaScript (`Command.parse`/`format`) and the REPL Terminal consume. -//! -//! Each tool's JSON schema is reduced to a flat `SchemaInfo` (required names, -//! field types, hint slots, recording flags) so callers don't re-parse the -//! input_schema string. `globalSchemas()` is the lazy process-wide cache used -//! by `Command.parse`/`format` when no agent-scoped cache is plumbed (script -//! replay, recorder format, tests). +//! Flat view of `browser_tools.tool_defs` shared by PandaScript and the REPL. +//! `globalSchemas()` is the lazy process-wide cache. const std = @import("std"); const lp = @import("lightpanda"); @@ -34,22 +28,18 @@ pub const FieldType = enum { string, integer, number, boolean, other }; pub const FieldEntry = struct { name: []const u8, field_type: FieldType, - /// Default for booleans declared with `"default": true` in the JSON schema. /// Used by `Command.format` to omit `checked=true` when emitting `/setChecked`. default_true: bool = false, }; -/// One slot of the REPL's argument-syntax hint, in display order: required -/// fields first, then optionals. `fragment` is pre-rendered as `` for -/// required and `[name=…]` for optional so the renderer can hand it directly -/// to the shared writer. +/// REPL argument-syntax hint slot. `fragment` is pre-rendered as `` for +/// required and `[name=…]` for optional. pub const HintSlot = struct { name: []const u8, required: bool, fragment: []const u8, }; -/// Upper bound on per-schema hint slots; lets the renderer use a stack array. /// Asserted at schema build time so adding a tool with more fields fails loud. pub const max_hint_slots: usize = 16; @@ -57,20 +47,14 @@ pub const max_hint_slots: usize = 16; pub const SchemaInfo = struct { tool_name: []const u8, description: []const u8, - input_schema_raw: []const u8, required: []const []const u8, fields: []const FieldEntry, hints: []const HintSlot, - /// Mirrors `ToolDef.recorded` — kept on SchemaInfo so the script layer - /// doesn't have to re-resolve via `tool_defs` for every command. recorded: bool, can_heal: bool, produces_data: bool, parameters: std.json.Value, - /// True when this tool's args fit a multi-line `/ '''…'''` opener: - /// exactly one required field, and that field is a string. Used by - /// `Command.ScriptIterator` to detect block openers. pub fn isMultiLineCapable(self: *const SchemaInfo) bool { return self.required.len == 1 and self.fieldType(self.required[0]) == .string; } @@ -107,7 +91,6 @@ fn buildOne(arena: std.mem.Allocator, td: browser_tools.ToolDef, parsed: std.jso var info: SchemaInfo = .{ .tool_name = td.name, .description = td.description, - .input_schema_raw = td.input_schema, .required = &.{}, .fields = &.{}, .hints = &.{}, @@ -225,19 +208,14 @@ pub fn splitNameRest(input: []const u8) ?Split { }; } -/// Parse `rest` (the args portion of a slash command) into a `std.json.Value` -/// shaped for the tool. Returns null when the schema takes no args and `rest` -/// is empty; that lets the caller pass `null` straight to `tool_executor.call` -/// without allocating an empty object. +/// Parse `rest` (args portion of a slash command) into a `std.json.Value`. +/// Returns null when the schema takes no args and `rest` is empty. /// /// Argument-binding rules: -/// - Bare `{json}` payload — returned as-is after JSON parse. Pass-through -/// avoids re-stringifying the blob the LLM emitted. -/// - A single leading positional token binds to the schema's sole required -/// field when `schema.required.len == 1`. Multiple positionals (or one -/// positional with `required.len != 1`) error. -/// - Everything else is `key=value`. Coercion: integer/number/boolean -/// fields parse their respective types; anything else stays a string. +/// - Bare `{json}` payload returned as-is. +/// - Single leading positional binds to `schema.required[0]` when +/// `schema.required.len == 1`. Otherwise positionals error. +/// - Everything else is `key=value` with type coercion via `coerce`. pub fn parseValue(arena: std.mem.Allocator, schema: *const SchemaInfo, rest: []const u8) ParseError!?std.json.Value { if (rest.len == 0) { if (schema.required.len > 0) return error.MissingRequired; @@ -264,8 +242,8 @@ pub fn parseValue(arena: std.mem.Allocator, schema: *const SchemaInfo, rest: []c list.appendAssumeCapacity(.{ .key = tok[0..eq], .value = stripQuotes(tok[eq + 1 ..]) }); } - // Default-true required booleans (e.g. setChecked.checked) are filled in - // when omitted, so `/setChecked selector='#a'` works without `checked=true`. + // Default-true booleans (e.g. setChecked.checked) so `/setChecked + // selector='#a'` works without `checked=true`. for (schema.required) |req| { var found = false; for (list.items) |p| { @@ -291,9 +269,8 @@ const KvPair = struct { value: []const u8, }; -/// Split `input` into tokens, treating `"…"` and `'…'` as a single token (the -/// surrounding quotes are stripped at value-extraction time, not here). -/// Tokens may contain `=`. +/// Tokenize on whitespace. `"…"` and `'…'` (single or triple) are kept whole; +/// quote stripping happens later. Tokens may contain `=`. fn tokenize(arena: std.mem.Allocator, input: []const u8) ParseError![][]const u8 { var out: std.ArrayList([]const u8) = .empty; @@ -369,17 +346,14 @@ fn coerce(arena: std.mem.Allocator, schema: *const SchemaInfo, key: []const u8, return .{ .string = try arena.dupe(u8, value) }; } -// --- Global lazy schema cache --- -// -// Single-threaded REPL only — if multi-threaded usage emerges, swap the guard -// for `std.Once` semantics. +// --- Global lazy schema cache (process-lifetime) --- var global_schemas_storage: [browser_tools.tool_defs.len]SchemaInfo = undefined; var global_arena: std.heap.ArenaAllocator = undefined; var global_once = std.once(initGlobal); -/// Process-lifetime schema cache. Panics on init failure — `tool_defs` is -/// compile-time constant, so a parse/build error is a build-time bug. +/// Panics on init failure — `tool_defs` is compile-time constant, so any +/// parse/build error is a build-time bug. pub fn globalSchemas() []const SchemaInfo { global_once.call(); return global_schemas_storage[0..browser_tools.tool_defs.len]; @@ -405,19 +379,15 @@ const testing = std.testing; test "globalSchemas: comptime tool defs reduce cleanly" { const schemas = globalSchemas(); try testing.expect(schemas.len == browser_tools.tool_defs.len); - // /goto has one required string field — multi-line capable. const goto = findSchema(schemas, "goto").?; try testing.expect(goto.isMultiLineCapable()); try testing.expect(goto.recorded); - // /scroll has zero required fields — not multi-line capable. const scroll = findSchema(schemas, "scroll").?; try testing.expect(!scroll.isMultiLineCapable()); try testing.expect(scroll.recorded); - // /tree is read-only; should not be recorded. const tree = findSchema(schemas, "tree").?; try testing.expect(!tree.recorded); try testing.expect(tree.produces_data); - // /setChecked's `checked` field carries default=true. const set_checked = findSchema(schemas, "setChecked").?; var checked_default_true = false; for (set_checked.fields) |f| { @@ -425,7 +395,6 @@ test "globalSchemas: comptime tool defs reduce cleanly" { } try testing.expect(checked_default_true); - // canonical lookup matches search lookup try testing.expect(findSchemaCanonical(schemas, "goto") == goto); try testing.expect(findSchemaCanonical(schemas, "unknown_tool") == null); }