mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
script: support triple-quoted strings and optimize lookups
- Add support for triple-quoted strings (''' and """) in the tokenizer
and quote stripper.
- Avoid allocations in `stripQuotes` by returning slices.
- Optimize schema lookups using O(1) enum-based canonical resolution.
- Skip env var substitution if "$LP_" is not present in the string.
This commit is contained in:
@@ -671,8 +671,6 @@ fn runActionEntry(self: *Agent, sa: std.mem.Allocator, entry: Command.ScriptIter
|
||||
if (!result.is_error and verification != .failed) return .ok;
|
||||
|
||||
if (self.self_heal and self.ai_client != null) {
|
||||
// Verification-only failures often resolve with a brief wait
|
||||
// (animations, lazy-load); skip the LLM round-trip when they do.
|
||||
if (!result.is_error and isRetryable(entry.command) and self.retryCommand(ca, entry.command)) {
|
||||
return .ok;
|
||||
}
|
||||
@@ -720,6 +718,18 @@ fn retryCommand(self: *Agent, ca: std.mem.Allocator, cmd: Command) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isRetryable(cmd: Command) bool {
|
||||
const tc = switch (cmd) {
|
||||
.tool_call => |t| t,
|
||||
else => return false,
|
||||
};
|
||||
const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse return false;
|
||||
return switch (action) {
|
||||
.fill, .setChecked, .selectOption => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn flushReplacements(self: *Agent, path: []const u8, content: []const u8, replacements: []const Replacement) void {
|
||||
if (replacements.len == 0) return;
|
||||
script.writeAtomic(self.allocator, std.fs.cwd(), path, content, replacements) catch |err| {
|
||||
@@ -735,18 +745,6 @@ fn flushReplacements(self: *Agent, path: []const u8, content: []const u8, replac
|
||||
);
|
||||
}
|
||||
|
||||
fn isRetryable(cmd: Command) bool {
|
||||
const tc = switch (cmd) {
|
||||
.tool_call => |t| t,
|
||||
else => return false,
|
||||
};
|
||||
const action = std.meta.stringToEnum(browser_tools.Action, tc.name) orelse return false;
|
||||
return switch (action) {
|
||||
.fill, .setChecked, .selectOption => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
const self_heal_max_attempts = 3;
|
||||
|
||||
fn ensureSystemPrompt(self: *Agent) !void {
|
||||
|
||||
@@ -58,12 +58,14 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ?
|
||||
const v = args orelse return null;
|
||||
if (v != .object) return v;
|
||||
|
||||
const is_fill = if (std.meta.stringToEnum(browser_tools.Action, tool_name)) |a| a == .fill else false;
|
||||
|
||||
var needs_sub = false;
|
||||
var it = v.object.iterator();
|
||||
while (it.next()) |entry| {
|
||||
const key = entry.key_ptr.*;
|
||||
const val = entry.value_ptr.*;
|
||||
const exclude = std.mem.eql(u8, tool_name, "fill") and std.mem.eql(u8, key, "value");
|
||||
const exclude = is_fill and std.mem.eql(u8, key, "value");
|
||||
if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) {
|
||||
needs_sub = true;
|
||||
break;
|
||||
@@ -77,8 +79,8 @@ fn substituteStringArgs(arena: std.mem.Allocator, tool_name: []const u8, args: ?
|
||||
while (it.next()) |entry| {
|
||||
const key = entry.key_ptr.*;
|
||||
const val = entry.value_ptr.*;
|
||||
const exclude = std.mem.eql(u8, tool_name, "fill") and std.mem.eql(u8, key, "value");
|
||||
if (!exclude and val == .string) {
|
||||
const exclude = is_fill and std.mem.eql(u8, key, "value");
|
||||
if (!exclude and val == .string and std.mem.indexOf(u8, val.string, "$LP_") != null) {
|
||||
const resolved = try browser_tools.substituteEnvVars(arena, val.string);
|
||||
try new_obj.put(key, .{ .string = resolved });
|
||||
continue;
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! REPL-only meta slash commands and re-exports of the PandaScript schema
|
||||
//! primitives. The actual slash-command grammar (which now IS PandaScript)
|
||||
//! lives in `script/schema.zig`; this module keeps the agent-only meta
|
||||
//! commands (`/help`, `/quit`, `/verbosity`) that aren't part of the script.
|
||||
//! primitives. The actual slash-command grammar lives in `script/schema.zig`;
|
||||
//! this module keeps the agent-only meta commands (`/help`, `/quit`,
|
||||
//! `/verbosity`) that aren't part of the script.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
@@ -39,6 +39,7 @@ pub const max_hint_slots = schema.max_hint_slots;
|
||||
pub const buildSchemas = schema.buildSchemas;
|
||||
pub const globalSchemas = schema.globalSchemas;
|
||||
pub const findSchema = schema.findSchema;
|
||||
pub const findSchemaCanonical = schema.findSchemaCanonical;
|
||||
pub const splitNameRest = schema.splitNameRest;
|
||||
|
||||
/// Meta slash commands handled directly by the agent (not by ToolExecutor).
|
||||
|
||||
@@ -519,7 +519,7 @@ fn slashHasPrefix(name: []const u8) bool {
|
||||
}
|
||||
|
||||
fn slashHasParams(name: []const u8) bool {
|
||||
if (SlashCommand.findSchema(SlashCommand.globalSchemas(), name)) |s| return s.hints.len > 0;
|
||||
if (SlashCommand.findSchemaCanonical(SlashCommand.globalSchemas(), name)) |s| return s.hints.len > 0;
|
||||
if (SlashCommand.findMeta(name)) |m| return m.hint.len > 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -337,3 +337,35 @@ test "init creates the file if missing" {
|
||||
const n = file.readAll(&buf) catch unreachable;
|
||||
try std.testing.expectEqualStrings("/goto 'https://example.com'\n", buf[0..n]);
|
||||
}
|
||||
|
||||
test "record and parse: triple-quote round-trip" {
|
||||
var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
const aa = arena.allocator();
|
||||
|
||||
var tmp = std.testing.tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
|
||||
var recorder = try Recorder.init(std.testing.allocator, tmp.dir, "triple.lp");
|
||||
defer recorder.deinit();
|
||||
|
||||
const cmd_str = "/extract '{\n \"title\": \"span.title\",\n \"desc\": \"p.description\"\n}'";
|
||||
const original_cmd = parseLine(aa, cmd_str);
|
||||
recorder.record(original_cmd);
|
||||
|
||||
const file = tmp.dir.openFile("triple.lp", .{}) catch unreachable;
|
||||
defer file.close();
|
||||
var buf: [512]u8 = undefined;
|
||||
const n = file.readAll(&buf) catch unreachable;
|
||||
const content = buf[0..n];
|
||||
|
||||
var iter: Command.ScriptIterator = .init(aa, content);
|
||||
const entry = (try iter.next()).?;
|
||||
const parsed_cmd = entry.command;
|
||||
|
||||
try std.testing.expectEqualStrings("extract", parsed_cmd.tool_call.name);
|
||||
|
||||
const original_val = original_cmd.tool_call.args.?.object.get("schema").?.string;
|
||||
const parsed_val = parsed_cmd.tool_call.args.?.object.get("schema").?.string;
|
||||
try std.testing.expectEqualStrings(original_val, parsed_val);
|
||||
}
|
||||
|
||||
@@ -51,8 +51,8 @@ pub const Command = union(enum) {
|
||||
.comment => false,
|
||||
.login, .accept_cookies => true,
|
||||
.tool_call => |tc| blk: {
|
||||
const td = toolDef(tc.name) orelse break :blk false;
|
||||
if (!td.recorded) break :blk false;
|
||||
const s = schema.findSchemaCanonical(schema.globalSchemas(), tc.name) orelse break :blk false;
|
||||
if (!s.recorded) break :blk false;
|
||||
// backendNodeId-based calls aren't replayable (the id is
|
||||
// invalidated by any DOM mutation), so keep them out of the
|
||||
// recording even when the tool itself is recordable.
|
||||
@@ -65,7 +65,7 @@ pub const Command = union(enum) {
|
||||
|
||||
pub fn producesData(self: Command) bool {
|
||||
return switch (self) {
|
||||
.tool_call => |tc| if (toolDef(tc.name)) |td| td.produces_data else false,
|
||||
.tool_call => |tc| if (schema.findSchemaCanonical(schema.globalSchemas(), tc.name)) |s| s.produces_data else false,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
@@ -82,7 +82,7 @@ pub const Command = union(enum) {
|
||||
/// `can_heal` flag in `tool_defs`; here it's just a lookup.
|
||||
pub fn canHeal(self: Command) bool {
|
||||
return switch (self) {
|
||||
.tool_call => |tc| if (toolDef(tc.name)) |td| td.can_heal else false,
|
||||
.tool_call => |tc| if (schema.findSchemaCanonical(schema.globalSchemas(), tc.name)) |s| s.can_heal else false,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
@@ -149,7 +149,7 @@ pub const Command = union(enum) {
|
||||
/// Use when the Command must outlive the original args buffer (e.g. the
|
||||
/// self-heal path returns Commands across an arena deinit).
|
||||
pub fn fromToolCallOwned(arena: std.mem.Allocator, tool_name: []const u8, arguments: ?std.json.Value) std.mem.Allocator.Error!Command {
|
||||
const owned_name = if (toolDef(tool_name)) |td| td.name else try arena.dupe(u8, tool_name);
|
||||
const owned_name = if (schema.findSchemaCanonical(schema.globalSchemas(), tool_name)) |s| s.tool_name else try arena.dupe(u8, tool_name);
|
||||
const owned_args = if (arguments) |v| try dupeJsonValue(arena, v) else null;
|
||||
return .{ .tool_call = .{ .name = owned_name, .args = owned_args } };
|
||||
}
|
||||
@@ -264,13 +264,6 @@ pub const Command = union(enum) {
|
||||
};
|
||||
};
|
||||
|
||||
fn toolDef(name: []const u8) ?*const lp.tools.ToolDef {
|
||||
for (&lp.tools.tool_defs) |*td| {
|
||||
if (std.mem.eql(u8, td.name, name)) return td;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Deep-copy a `std.json.Value`, duplicating all owned strings and containers
|
||||
/// into `a`. Used by `fromToolCallOwned` for the heal path.
|
||||
fn dupeJsonValue(a: std.mem.Allocator, value: std.json.Value) std.mem.Allocator.Error!std.json.Value {
|
||||
@@ -326,15 +319,13 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er
|
||||
if (isDefaultTrueBool(s, entry.key_ptr.*, entry.value_ptr.*)) continue;
|
||||
visible += 1;
|
||||
}
|
||||
if (has_one_required and visible == 1) {
|
||||
if (has_one_required and visible == 1) blk: {
|
||||
const req_name = s.required[0];
|
||||
if (args.get(req_name)) |v| {
|
||||
if (v == .string) {
|
||||
try writer.writeByte(' ');
|
||||
try formatString(writer, v.string);
|
||||
positional_emitted = req_name;
|
||||
}
|
||||
}
|
||||
const v = args.get(req_name) orelse break :blk;
|
||||
if (v != .string) break :blk;
|
||||
try writer.writeByte(' ');
|
||||
try formatString(writer, v.string);
|
||||
positional_emitted = req_name;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -353,11 +344,7 @@ fn formatToolCall(tc: Command.ToolCall, writer: *std.Io.Writer) std.Io.Writer.Er
|
||||
}
|
||||
|
||||
fn isDefaultTrueBool(s: *const schema.SchemaInfo, key: []const u8, v: std.json.Value) bool {
|
||||
if (v != .bool or !v.bool) return false;
|
||||
for (s.fields) |f| {
|
||||
if (std.mem.eql(u8, f.name, key)) return f.default_true;
|
||||
}
|
||||
return false;
|
||||
return v == .bool and v.bool and s.isFieldDefaultTrue(key);
|
||||
}
|
||||
|
||||
/// Strings are always quoted (or triple-quoted when they contain newlines)
|
||||
|
||||
@@ -82,6 +82,13 @@ pub const SchemaInfo = struct {
|
||||
}
|
||||
return .other;
|
||||
}
|
||||
|
||||
pub fn isFieldDefaultTrue(self: *const SchemaInfo, key: []const u8) bool {
|
||||
for (self.fields) |f| {
|
||||
if (std.mem.eql(u8, f.name, key)) return f.default_true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
pub const ParseError = error{
|
||||
@@ -206,6 +213,12 @@ pub fn findSchema(schemas: []const SchemaInfo, name: []const u8) ?*const SchemaI
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn findSchemaCanonical(schemas: []const SchemaInfo, name: []const u8) ?*const SchemaInfo {
|
||||
std.debug.assert(schemas.len == browser_tools.tool_defs.len);
|
||||
const action = std.meta.stringToEnum(browser_tools.Action, name) orelse return null;
|
||||
return &schemas[@intFromEnum(action)];
|
||||
}
|
||||
|
||||
pub const Split = struct {
|
||||
name: []const u8,
|
||||
rest: []const u8,
|
||||
@@ -252,12 +265,12 @@ pub fn parseValue(arena: std.mem.Allocator, schema: *const SchemaInfo, rest: []c
|
||||
var pairs = try arena.alloc(KvPair, tokens.len);
|
||||
const kv_start: usize = if (leading_positional) 1 else 0;
|
||||
if (leading_positional) {
|
||||
pairs[0] = .{ .key = schema.required[0], .value = try stripQuotes(arena, tokens[0]) };
|
||||
pairs[0] = .{ .key = schema.required[0], .value = stripQuotes(tokens[0]) };
|
||||
}
|
||||
for (tokens[kv_start..], kv_start..) |tok, i| {
|
||||
const eq = std.mem.indexOfScalar(u8, tok, '=') orelse return error.MalformedKv;
|
||||
if (eq == 0 or eq == tok.len - 1) return error.MalformedKv;
|
||||
pairs[i] = .{ .key = tok[0..eq], .value = try stripQuotes(arena, tok[eq + 1 ..]) };
|
||||
pairs[i] = .{ .key = tok[0..eq], .value = stripQuotes(tok[eq + 1 ..]) };
|
||||
}
|
||||
|
||||
// Default-true required booleans (e.g. setChecked.checked) are filled in
|
||||
@@ -306,8 +319,15 @@ fn tokenize(arena: std.mem.Allocator, input: []const u8) ParseError![][]const u8
|
||||
while (i < input.len and !std.ascii.isWhitespace(input[i])) : (i += 1) {
|
||||
const ch = input[i];
|
||||
if (ch == '"' or ch == '\'') {
|
||||
const close = std.mem.indexOfScalarPos(u8, input, i + 1, ch) orelse return error.UnterminatedQuote;
|
||||
i = close;
|
||||
const is_triple = i + 2 < input.len and input[i + 1] == ch and input[i + 2] == ch;
|
||||
if (is_triple) {
|
||||
const triple_delim = input[i .. i + 3];
|
||||
const close = std.mem.indexOfPos(u8, input, i + 3, triple_delim) orelse return error.UnterminatedQuote;
|
||||
i = close + 2;
|
||||
} else {
|
||||
const close = std.mem.indexOfScalarPos(u8, input, i + 1, ch) orelse return error.UnterminatedQuote;
|
||||
i = close;
|
||||
}
|
||||
}
|
||||
}
|
||||
try out.append(arena, input[tok_start..i]);
|
||||
@@ -316,27 +336,23 @@ fn tokenize(arena: std.mem.Allocator, input: []const u8) ParseError![][]const u8
|
||||
return try out.toOwnedSlice(arena);
|
||||
}
|
||||
|
||||
fn stripQuotes(arena: std.mem.Allocator, raw: []const u8) ParseError![]const u8 {
|
||||
const has_quote = std.mem.indexOfAny(u8, raw, "\"'") != null;
|
||||
if (!has_quote) return raw;
|
||||
|
||||
var buf: std.ArrayList(u8) = .empty;
|
||||
try buf.ensureTotalCapacity(arena, raw.len);
|
||||
var i: usize = 0;
|
||||
while (i < raw.len) {
|
||||
const ch = raw[i];
|
||||
if (ch == '"' or ch == '\'') {
|
||||
i += 1;
|
||||
const start = i;
|
||||
while (i < raw.len and raw[i] != ch) i += 1;
|
||||
try buf.appendSlice(arena, raw[start..i]);
|
||||
i += 1;
|
||||
continue;
|
||||
fn stripQuotes(raw: []const u8) []const u8 {
|
||||
if (raw.len >= 6) {
|
||||
if (std.mem.startsWith(u8, raw, "'''") and std.mem.endsWith(u8, raw, "'''")) {
|
||||
return raw[3 .. raw.len - 3];
|
||||
}
|
||||
if (std.mem.startsWith(u8, raw, "\"\"\"") and std.mem.endsWith(u8, raw, "\"\"\"")) {
|
||||
return raw[3 .. raw.len - 3];
|
||||
}
|
||||
try buf.append(arena, ch);
|
||||
i += 1;
|
||||
}
|
||||
return try buf.toOwnedSlice(arena);
|
||||
if (raw.len >= 2) {
|
||||
const first = raw[0];
|
||||
const last = raw[raw.len - 1];
|
||||
if ((first == '\'' and last == '\'') or (first == '"' and last == '"')) {
|
||||
return raw[1 .. raw.len - 1];
|
||||
}
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
|
||||
fn buildValue(arena: std.mem.Allocator, schema: *const SchemaInfo, pairs: []const KvPair) error{OutOfMemory}!std.json.Value {
|
||||
@@ -429,6 +445,10 @@ test "globalSchemas: comptime tool defs reduce cleanly" {
|
||||
if (std.mem.eql(u8, f.name, "checked")) checked_default_true = f.default_true;
|
||||
}
|
||||
try testing.expect(checked_default_true);
|
||||
|
||||
// canonical lookup matches search lookup
|
||||
try testing.expect(findSchemaCanonical(schemas, "goto") == goto);
|
||||
try testing.expect(findSchemaCanonical(schemas, "unknown_tool") == null);
|
||||
}
|
||||
|
||||
test "parseValue: single-required positional binds" {
|
||||
@@ -519,3 +539,12 @@ test "splitNameRest: trims and handles empty" {
|
||||
try testing.expectEqualStrings("goto", r.name);
|
||||
try testing.expectEqualStrings("https://x", r.rest);
|
||||
}
|
||||
|
||||
test "tokenize: inline triple quotes with spaces" {
|
||||
var arena: std.heap.ArenaAllocator = .init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const tokens = try tokenize(arena.allocator(), "selector='''hello world''' value=\"\"\"foo bar\"\"\"");
|
||||
try testing.expectEqual(@as(usize, 2), tokens.len);
|
||||
try testing.expectEqualStrings("selector='''hello world'''", tokens[0]);
|
||||
try testing.expectEqualStrings("value=\"\"\"foo bar\"\"\"", tokens[1]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user