script: support triple-quoted strings and optimize lookups

- Add support for triple-quoted strings (''' and """) in the tokenizer
  and quote stripper.
- Avoid allocations in `stripQuotes` by returning slices.
- Optimize schema lookups using O(1) enum-based canonical resolution.
- Skip env var substitution if "$LP_" is not present in the string.
This commit is contained in:
Adrià Arrufat
2026-05-21 21:33:05 +02:00
parent 7f60d73338
commit d16f8a032e
7 changed files with 118 additions and 69 deletions

View File

@@ -671,8 +671,6 @@ fn runActionEntry(self: *Agent, sa: std.mem.Allocator, entry: Command.ScriptIter
if (!result.is_error and verification != .failed) return .ok;
if (self.self_heal and self.ai_client != null) {
// Verification-only failures often resolve with a brief wait
// (animations, lazy-load); skip the LLM round-trip when they do.
if (!result.is_error and isRetryable(entry.command) and self.retryCommand(ca, entry.command)) {
return .ok;
}
@@ -720,6 +718,18 @@ fn retryCommand(self: *Agent, ca: std.mem.Allocator, cmd: Command) bool {
return false;
}
fn isRetryable(cmd: Command) bool {
const tc = switch (cmd) {
.tool_call => |t| t,
else => return false,
};
const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse return false;
return switch (action) {
.fill, .setChecked, .selectOption => true,
else => false,
};
}
fn flushReplacements(self: *Agent, path: []const u8, content: []const u8, replacements: []const Replacement) void {
if (replacements.len == 0) return;
script.writeAtomic(self.allocator, std.fs.cwd(), path, content, replacements) catch |err| {
@@ -735,18 +745,6 @@ fn flushReplacements(self: *Agent, path: []const u8, content: []const u8, replac
);
}
fn isRetryable(cmd: Command) bool {
const tc = switch (cmd) {
.tool_call => |t| t,
else => return false,
};
const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse return false;
return switch (action) {
.fill, .setChecked, .selectOption => true,
else => false,
};
}
const self_heal_max_attempts = 3;
fn ensureSystemPrompt(self: *Agent) !void {

View File

@@ -58,12 +58,14 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ?
const v = args orelse return null;
if (v != .object) return v;
const is_fill = if (std.meta.stringToEnum(browser_tools.Action, tool_name)) |a| a == .fill else false;
var needs_sub = false;
var it = v.object.iterator();
while (it.next()) |entry| {
const key = entry.key_ptr.*;
const val = entry.value_ptr.*;
const exclude = std.mem.eql(u8, tool_name, "fill") and std.mem.eql(u8, key, "value");
const exclude = is_fill and std.mem.eql(u8, key, "value");
if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) {
needs_sub = true;
break;
@@ -77,8 +79,8 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ?
while (it.next()) |entry| {
const key = entry.key_ptr.*;
const val = entry.value_ptr.*;
const exclude = std.mem.eql(u8, tool_name, "fill") and std.mem.eql(u8, key, "value");
if (!exclude and val == .string) {
const exclude = is_fill and std.mem.eql(u8, key, "value");
if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) {
const resolved = try browser_tools.substituteEnvVars(arena, val.string);
try new_obj.put(key, .{ .string = resolved });
continue;

View File

@@ -17,9 +17,9 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! REPL-only meta slash commands and re-exports of the PandaScript schema
//! primitives. The actual slash-command grammar (which now IS PandaScript)
//! lives in `script/schema.zig`; this module keeps the agent-only meta
//! commands (`/help`, `/quit`, `/verbosity`) that aren't part of the script.
//! primitives. The actual slash-command grammar lives in `script/schema.zig`;
//! this module keeps the agent-only meta commands (`/help`, `/quit`,
//! `/verbosity`) that aren't part of the script.
const std = @import("std");
const lp = @import("lightpanda");
@@ -39,6 +39,7 @@ pub const max_hint_slots = schema.max_hint_slots;
pub const buildSchemas = schema.buildSchemas;
pub const globalSchemas = schema.globalSchemas;
pub const findSchema = schema.findSchema;
pub const findSchemaCanonical = schema.findSchemaCanonical;
pub const splitNameRest = schema.splitNameRest;
/// Meta slash commands handled directly by the agent (not by ToolExecutor).

View File

@@ -519,7 +519,7 @@ fn slashHasPrefix(name: []const u8) bool {
}
fn slashHasParams(name: []const u8) bool {
if (SlashCommand.findSchema(SlashCommand.globalSchemas(), name)) |s| return s.hints.len > 0;
if (SlashCommand.findSchemaCanonical(SlashCommand.globalSchemas(), name)) |s| return s.hints.len > 0;
if (SlashCommand.findMeta(name)) |m| return m.hint.len > 0;
return false;
}

View File

@@ -337,3 +337,35 @@ test "init creates the file if missing" {
const n = file.readAll(&buf) catch unreachable;
try std.testing.expectEqualStrings("/goto 'https://example.com'\n", buf[0..n]);
}
test "record and parse: triple-quote round-trip" {
var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
defer arena.deinit();
const aa = arena.allocator();
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
var recorder = try Recorder.init(std.testing.allocator, tmp.dir, "triple.lp");
defer recorder.deinit();
const cmd_str = "/extract '{\n \"title\": \"span.title\",\n \"desc\": \"p.description\"\n}'";
const original_cmd = parseLine(aa, cmd_str);
recorder.record(original_cmd);
const file = tmp.dir.openFile("triple.lp", .{}) catch unreachable;
defer file.close();
var buf: [512]u8 = undefined;
const n = file.readAll(&buf) catch unreachable;
const content = buf[0..n];
var iter: Command.ScriptIterator = .init(aa, content);
const entry = (try iter.next()).?;
const parsed_cmd = entry.command;
try std.testing.expectEqualStrings("extract", parsed_cmd.tool_call.name);
const original_val = original_cmd.tool_call.args.?.object.get("schema").?.string;
const parsed_val = parsed_cmd.tool_call.args.?.object.get("schema").?.string;
try std.testing.expectEqualStrings(original_val, parsed_val);
}

View File

@@ -51,8 +51,8 @@ pub const Command = union(enum) {
.comment => false,
.login, .accept_cookies => true,
.tool_call => |tc| blk: {
const td = toolDef(tc.name) orelse break :blk false;
if (!td.recorded) break :blk false;
const s = schema.findSchemaCanonical(schema.globalSchemas(), tc.name) orelse break :blk false;
if (!s.recorded) break :blk false;
// backendNodeId-based calls aren't replayable (the id is
// invalidated by any DOM mutation), so keep them out of the
// recording even when the tool itself is recordable.
@@ -65,7 +65,7 @@ pub const Command = union(enum) {
pub fn producesData(self: Command) bool {
return switch (self) {
.tool_call => |tc| if (toolDef(tc.name)) |td| td.produces_data else false,
.tool_call => |tc| if (schema.findSchemaCanonical(schema.globalSchemas(), tc.name)) |s| s.produces_data else false,
else => false,
};
}
@@ -82,7 +82,7 @@ pub const Command = union(enum) {
/// `can_heal` flag in `tool_defs`; here it's just a lookup.
pub fn canHeal(self: Command) bool {
return switch (self) {
.tool_call => |tc| if (toolDef(tc.name)) |td| td.can_heal else false,
.tool_call => |tc| if (schema.findSchemaCanonical(schema.globalSchemas(), tc.name)) |s| s.can_heal else false,
else => false,
};
}
@@ -149,7 +149,7 @@ pub const Command = union(enum) {
/// Use when the Command must outlive the original args buffer (e.g. the
/// self-heal path returns Commands across an arena deinit).
pub fn fromToolCallOwned(arena: std.mem.Allocator, tool_name: []const u8, arguments: ?std.json.Value) std.mem.Allocator.Error!Command {
const owned_name = if (toolDef(tool_name)) |td| td.name else try arena.dupe(u8, tool_name);
const owned_name = if (schema.findSchemaCanonical(schema.globalSchemas(), tool_name)) |s| s.tool_name else try arena.dupe(u8, tool_name);
const owned_args = if (arguments) |v| try dupeJsonValue(arena, v) else null;
return .{ .tool_call = .{ .name = owned_name, .args = owned_args } };
}
@@ -264,13 +264,6 @@ pub const Command = union(enum) {
};
};
fn toolDef(name: []const u8) ?*const lp.tools.ToolDef {
for (&lp.tools.tool_defs) |*td| {
if (std.mem.eql(u8, td.name, name)) return td;
}
return null;
}
/// Deep-copy a `std.json.Value`, duplicating all owned strings and containers
/// into `a`. Used by `fromToolCallOwned` for the heal path.
fn dupeJsonValue(a: std.mem.Allocator, value: std.json.Value) std.mem.Allocator.Error!std.json.Value {
@@ -326,15 +319,13 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er
if (isDefaultTrueBool(s, entry.key_ptr.*, entry.value_ptr.*)) continue;
visible += 1;
}
if (has_one_required and visible == 1) {
if (has_one_required and visible == 1) blk: {
const req_name = s.required[0];
if (args.get(req_name)) |v| {
if (v == .string) {
try writer.writeByte(' ');
try formatString(writer, v.string);
positional_emitted = req_name;
}
}
const v = args.get(req_name) orelse break :blk;
if (v != .string) break :blk;
try writer.writeByte(' ');
try formatString(writer, v.string);
positional_emitted = req_name;
}
}
@@ -353,11 +344,7 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er
}
fn isDefaultTrueBool(s: *const schema.SchemaInfo, key: []const u8, v: std.json.Value) bool {
if (v != .bool or !v.bool) return false;
for (s.fields) |f| {
if (std.mem.eql(u8, f.name, key)) return f.default_true;
}
return false;
return v == .bool and v.bool and s.isFieldDefaultTrue(key);
}
/// Strings are always quoted (or triple-quoted when they contain newlines)

View File

@@ -82,6 +82,13 @@ pub const SchemaInfo = struct {
}
return .other;
}
pub fn isFieldDefaultTrue(self: *const SchemaInfo, key: []const u8) bool {
for (self.fields) |f| {
if (std.mem.eql(u8, f.name, key)) return f.default_true;
}
return false;
}
};
pub const ParseError = error{
@@ -206,6 +213,12 @@ pub fn findSchema(schemas: []const SchemaInfo, name: []const u8) ?*const SchemaI
return null;
}
pub fn findSchemaCanonical(schemas: []const SchemaInfo, name: []const u8) ?*const SchemaInfo {
std.debug.assert(schemas.len == browser_tools.tool_defs.len);
const action = std.meta.stringToEnum(browser_tools.Action, name) orelse return null;
return &schemas[@intFromEnum(action)];
}
pub const Split = struct {
name: []const u8,
rest: []const u8,
@@ -252,12 +265,12 @@ pub fn parseValue(arena: std.mem.Allocator, schema: *const SchemaInfo, rest: []c
var pairs = try arena.alloc(KvPair, tokens.len);
const kv_start: usize = if (leading_positional) 1 else 0;
if (leading_positional) {
pairs[0] = .{ .key = schema.required[0], .value = try stripQuotes(arena, tokens[0]) };
pairs[0] = .{ .key = schema.required[0], .value = stripQuotes(tokens[0]) };
}
for (tokens[kv_start..], kv_start..) |tok, i| {
const eq = std.mem.indexOfScalar(u8, tok, '=') orelse return error.MalformedKv;
if (eq == 0 or eq == tok.len - 1) return error.MalformedKv;
pairs[i] = .{ .key = tok[0..eq], .value = try stripQuotes(arena, tok[eq + 1 ..]) };
pairs[i] = .{ .key = tok[0..eq], .value = stripQuotes(tok[eq + 1 ..]) };
}
// Default-true required booleans (e.g. setChecked.checked) are filled in
@@ -306,8 +319,15 @@ fn tokenize(arena: std.mem.Allocator, input: []const u8) ParseError![][]const u8
while (i < input.len and !std.ascii.isWhitespace(input[i])) : (i += 1) {
const ch = input[i];
if (ch == '"' or ch == '\'') {
const close = std.mem.indexOfScalarPos(u8, input, i + 1, ch) orelse return error.UnterminatedQuote;
i = close;
const is_triple = i + 2 < input.len and input[i + 1] == ch and input[i + 2] == ch;
if (is_triple) {
const triple_delim = input[i .. i + 3];
const close = std.mem.indexOfPos(u8, input, i + 3, triple_delim) orelse return error.UnterminatedQuote;
i = close + 2;
} else {
const close = std.mem.indexOfScalarPos(u8, input, i + 1, ch) orelse return error.UnterminatedQuote;
i = close;
}
}
}
try out.append(arena, input[tok_start..i]);
@@ -316,27 +336,23 @@ fn tokenize(arena: std.mem.Allocator, input: []const u8) ParseError![][]const u8
return try out.toOwnedSlice(arena);
}
fn stripQuotes(arena: std.mem.Allocator, raw: []const u8) ParseError![]const u8 {
const has_quote = std.mem.indexOfAny(u8, raw, "\"'") != null;
if (!has_quote) return raw;
var buf: std.ArrayList(u8) = .empty;
try buf.ensureTotalCapacity(arena, raw.len);
var i: usize = 0;
while (i < raw.len) {
const ch = raw[i];
if (ch == '"' or ch == '\'') {
i += 1;
const start = i;
while (i < raw.len and raw[i] != ch) i += 1;
try buf.appendSlice(arena, raw[start..i]);
i += 1;
continue;
fn stripQuotes(raw: []const u8) []const u8 {
if (raw.len >= 6) {
if (std.mem.startsWith(u8, raw, "'''") and std.mem.endsWith(u8, raw, "'''")) {
return raw[3 .. raw.len - 3];
}
if (std.mem.startsWith(u8, raw, "\"\"\"") and std.mem.endsWith(u8, raw, "\"\"\"")) {
return raw[3 .. raw.len - 3];
}
try buf.append(arena, ch);
i += 1;
}
return try buf.toOwnedSlice(arena);
if (raw.len >= 2) {
const first = raw[0];
const last = raw[raw.len - 1];
if ((first == '\'' and last == '\'') or (first == '"' and last == '"')) {
return raw[1 .. raw.len - 1];
}
}
return raw;
}
fn buildValue(arena: std.mem.Allocator, schema: *const SchemaInfo, pairs: []const KvPair) error{OutOfMemory}!std.json.Value {
@@ -429,6 +445,10 @@ test "globalSchemas: comptime tool defs reduce cleanly" {
if (std.mem.eql(u8, f.name, "checked")) checked_default_true = f.default_true;
}
try testing.expect(checked_default_true);
// canonical lookup matches search lookup
try testing.expect(findSchemaCanonical(schemas, "goto") == goto);
try testing.expect(findSchemaCanonical(schemas, "unknown_tool") == null);
}
test "parseValue: single-required positional binds" {
@@ -519,3 +539,12 @@ test "splitNameRest: trims and handles empty" {
try testing.expectEqualStrings("goto", r.name);
try testing.expectEqualStrings("https://x", r.rest);
}
test "tokenize: inline triple quotes with spaces" {
var arena: std.heap.ArenaAllocator = .init(testing.allocator);
defer arena.deinit();
const tokens = try tokenize(arena.allocator(), "selector='''hello world''' value=\"\"\"foo bar\"\"\"");
try testing.expectEqual(@as(usize, 2), tokens.len);
try testing.expectEqualStrings("selector='''hello world'''", tokens[0]);
try testing.expectEqualStrings("value=\"\"\"foo bar\"\"\"", tokens[1]);
}