mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
agent: verify synthesized scripts during /save
Introduces a multi-step synthesis process for `/save` that derives a logical JSON output schema and uses a dry-run runtime to verify candidate scripts. The LLM can now run and self-correct its scripts using a new `run_script` tool before finalizing the save.
This commit is contained in:
@@ -110,6 +110,14 @@ node_registry: CDPNode.Registry,
|
||||
terminal: Terminal,
|
||||
save_buffer: Recorder,
|
||||
save_path: ?[]u8,
|
||||
/// Backs `last_extract_json`; reset alongside `save_buffer`.
|
||||
last_extract_arena: std.heap.ArenaAllocator,
|
||||
/// The JSON the most recent successful `extract` returned this session — the
|
||||
/// real data `/save` grounds and verifies its synthesized script against.
|
||||
last_extract_json: ?[]const u8 = null,
|
||||
/// Set for the duration of an LLM `/save` so the `run_script` tool can reach
|
||||
/// the dry-run runtime it executes candidates on.
|
||||
active_verify: ?*Verify = null,
|
||||
script_runtime_mutex: std.Thread.Mutex = .{},
|
||||
active_script_runtime: ?*ScriptRuntime = null,
|
||||
conversation: Conversation,
|
||||
@@ -254,6 +262,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
|
||||
.terminal = .init(allocator, history_paths, verbosity, will_repl),
|
||||
.save_buffer = .init(allocator),
|
||||
.save_path = null,
|
||||
.last_extract_arena = .init(allocator),
|
||||
.conversation = .init(allocator, opts.system_prompt orelse default_system_prompt),
|
||||
.model = model,
|
||||
.effort = effort,
|
||||
@@ -294,6 +303,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
|
||||
pub fn deinit(self: *Agent) void {
|
||||
self.terminal.uninstallLogSink();
|
||||
self.save_buffer.deinit();
|
||||
self.last_extract_arena.deinit();
|
||||
if (self.save_path) |p| self.allocator.free(p);
|
||||
self.terminal.deinit();
|
||||
self.conversation.deinit();
|
||||
@@ -630,11 +640,19 @@ fn handleUsage(self: *Agent) void {
|
||||
/// node IDs. Shared by `/clear` and `/reset`.
|
||||
fn clearConversation(self: *Agent) void {
|
||||
self.conversation.rollback(0);
|
||||
self.save_buffer.reset();
|
||||
self.resetSaveBuffers();
|
||||
self.total_usage = .{};
|
||||
self.node_registry.reset();
|
||||
}
|
||||
|
||||
/// Drop everything `/save` accumulates: the recorded action buffer and the
|
||||
/// captured extract data that grounds synthesis.
|
||||
fn resetSaveBuffers(self: *Agent) void {
|
||||
self.save_buffer.reset();
|
||||
_ = self.last_extract_arena.reset(.retain_capacity);
|
||||
self.last_extract_json = null;
|
||||
}
|
||||
|
||||
/// Forget the conversation while leaving the browser session live — loaded page
|
||||
/// stays put, cookies/logins preserved.
|
||||
fn handleClear(self: *Agent) void {
|
||||
@@ -862,7 +880,7 @@ fn handleSave(self: *Agent, arena: std.mem.Allocator, rest: []const u8) void {
|
||||
new_save_path = null;
|
||||
}
|
||||
const saved_lines = self.save_buffer.lines;
|
||||
self.save_buffer.reset();
|
||||
self.resetSaveBuffers();
|
||||
self.terminal.printInfo("Saved {d} line(s) to {s}", .{ saved_lines, self.save_path.? });
|
||||
}
|
||||
|
||||
@@ -958,10 +976,130 @@ fn abortSave(self: *Agent, baseline: usize, reason: []const u8) void {
|
||||
self.failSave(reason);
|
||||
}
|
||||
|
||||
/// LLM-synthesized `/save`: hand the model the builtin catalog, the full
|
||||
/// conversation, and the deterministic record of what ran, then write the
|
||||
/// idiomatic script it returns.
|
||||
/// In-flight `/save` verification harness: the dry-run runtime the `run_script`
|
||||
/// tool executes candidates on, plus the last source it ran (a fallback script
|
||||
/// if the model finishes the loop without re-emitting it as text).
|
||||
const Verify = struct {
|
||||
runtime: *ScriptRuntime,
|
||||
last_source: ?[]const u8 = null,
|
||||
};
|
||||
|
||||
/// Agent-only addendum (kept out of the shared `save_synthesis_prompt`) telling
|
||||
/// the model to derive every value at runtime and check the result with run_script.
|
||||
const save_verify_addendum =
|
||||
\\Read data with the recorded extract(...), not evaluate() — extract can read a
|
||||
\\card's whole text via an empty selector (""). Reshape its result in plain JS so the
|
||||
\\completion value matches the schema exactly (same keys, parsed numbers); don't
|
||||
\\return the raw extract or hard-code values.
|
||||
\\Before finalizing, test with run_script: it runs your FULL script for real from a
|
||||
\\blank page, so it must goto(...) first (missing goto → "no page loaded", a wrong
|
||||
\\selector → null). Confirm every field is populated, then reply with ONLY the final
|
||||
\\JavaScript source.
|
||||
;
|
||||
|
||||
/// Cap on the captured extract sample shown in the synthesis prompt (the full
|
||||
/// data still feeds the dry run); keeps a large result from dominating context.
|
||||
const save_sample_cap = 8 * 1024;
|
||||
|
||||
/// LLM-synthesized `/save`. Pin the output shape first — derive the session's
|
||||
/// intent, then a typed output schema from it — so the script's result shape is
|
||||
/// stable across runs, then synthesize the script honoring that schema. Each
|
||||
/// step degrades gracefully: a null schema falls back to plain synthesis.
|
||||
fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8, prompt: ?[]const u8) void {
|
||||
self.conversation.ensureSystemPrompt() catch return self.failSave("out of memory");
|
||||
const baseline = self.conversation.messages.items.len;
|
||||
|
||||
const anchor = prompt orelse self.one_shot_task;
|
||||
const schema = self.deriveOutputSchema(arena, baseline, anchor);
|
||||
if (self.cancel_requested.load(.acquire)) {
|
||||
self.resetAfterCancel(baseline);
|
||||
return;
|
||||
}
|
||||
|
||||
self.synthesizeScript(arena, filename, prompt, schema);
|
||||
}
|
||||
|
||||
/// Steps 1–2 of `/save`: intent (over the session) → typed output schema. Both
|
||||
/// turns leave the conversation as they found it; returns null if either turn
|
||||
/// produced nothing usable (the caller then synthesizes without a schema).
|
||||
fn deriveOutputSchema(self: *Agent, arena: std.mem.Allocator, baseline: usize, anchor: ?[]const u8) ?[]const u8 {
|
||||
const intent = self.deriveIntent(arena, baseline, anchor) orelse return null;
|
||||
if (self.cancel_requested.load(.acquire)) return null;
|
||||
return self.deriveSchema(arena, intent);
|
||||
}
|
||||
|
||||
/// One-sentence intent from the session turns. Runs over the live conversation
|
||||
/// (so the model sees the session) but rolls back to `baseline`, keeping the
|
||||
/// turn out of history. An explicit anchor is folded in as authoritative.
|
||||
fn deriveIntent(self: *Agent, arena: std.mem.Allocator, baseline: usize, anchor: ?[]const u8) ?[]const u8 {
|
||||
const ma = self.conversation.arena.allocator();
|
||||
var out: std.Io.Writer.Allocating = .init(ma);
|
||||
out.writer.writeAll(browser_tools.save_intent_prompt) catch return null;
|
||||
if (anchor) |a| {
|
||||
out.writer.print("\nThe user described the goal as: {s}\nTreat that as authoritative and reconcile it with the session.", .{a}) catch return null;
|
||||
}
|
||||
self.conversation.messages.append(self.allocator, .{ .role = .user, .content = out.written() }) catch return null;
|
||||
defer self.conversation.rollback(baseline);
|
||||
return self.runTextTurn(&self.conversation.messages, arena, self.allocator, ma, 512, "understanding the task");
|
||||
}
|
||||
|
||||
/// Typed output schema from the intent. Runs over a throwaway message list —
|
||||
/// not the conversation — so the schema is derived from the logical intent
|
||||
/// alone, blind to the page structure and how the data was fetched.
|
||||
fn deriveSchema(self: *Agent, arena: std.mem.Allocator, intent: []const u8) ?[]const u8 {
|
||||
var msgs: std.ArrayList(zenai.provider.Message) = .empty;
|
||||
const msg = std.fmt.allocPrint(arena, "{s} {s}", .{ browser_tools.save_schema_prompt, intent }) catch return null;
|
||||
msgs.append(arena, .{ .role = .user, .content = msg }) catch return null;
|
||||
const raw = self.runTextTurn(&msgs, arena, arena, arena, 1024, "designing the output schema") orelse return null;
|
||||
return string.stripCodeFence(raw);
|
||||
}
|
||||
|
||||
/// Run a single no-tools text turn over `messages` and return the model's text
|
||||
/// duped into `dest` (so it survives any rollback of `messages`), or null on
|
||||
/// cancel, error, or empty output. Shared by the intent and schema steps.
|
||||
fn runTextTurn(
|
||||
self: *Agent,
|
||||
messages: *std.ArrayList(zenai.provider.Message),
|
||||
dest: std.mem.Allocator,
|
||||
list_alloc: std.mem.Allocator,
|
||||
data_alloc: std.mem.Allocator,
|
||||
max_tokens: i32,
|
||||
status: []const u8,
|
||||
) ?[]const u8 {
|
||||
self.terminal.spinner.start();
|
||||
self.terminal.spinner.setStatus(status);
|
||||
var result = self.ai_client.?.runTools(
|
||||
self.model,
|
||||
messages,
|
||||
list_alloc,
|
||||
data_alloc,
|
||||
.{ .context = @ptrCast(self), .callFn = handleToolCall },
|
||||
.{
|
||||
.tools = &.{},
|
||||
.max_turns = 1,
|
||||
.max_tokens = max_tokens,
|
||||
.tool_choice = .none,
|
||||
.effort = .low,
|
||||
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
|
||||
},
|
||||
) catch |err| {
|
||||
self.terminal.spinner.cancel();
|
||||
if (!self.cancel_requested.load(.acquire)) log.err(.app, "AI save schema turn error", .{ .err = err });
|
||||
return null;
|
||||
};
|
||||
self.terminal.spinner.stop();
|
||||
defer result.deinit();
|
||||
self.total_usage.add(result.usage);
|
||||
if (result.cancelled) return null;
|
||||
const text = std.mem.trim(u8, result.text orelse return null, &std.ascii.whitespace);
|
||||
if (text.len == 0) return null;
|
||||
return dest.dupe(u8, text) catch null;
|
||||
}
|
||||
|
||||
/// Step 3 of `/save`: hand the model the builtin catalog, the full conversation,
|
||||
/// the deterministic record of what ran, and the required output schema, then
|
||||
/// write the idiomatic script it returns.
|
||||
fn synthesizeScript(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8, prompt: ?[]const u8, schema: ?[]const u8) void {
|
||||
const provider_client = self.ai_client.?;
|
||||
|
||||
const resolved = self.resolveSavePathAndMode(arena, filename) orelse return;
|
||||
@@ -972,10 +1110,41 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
|
||||
const ma = self.conversation.arena.allocator();
|
||||
const baseline = self.conversation.messages.items.len;
|
||||
|
||||
const user_msg = self.buildSaveSynthesisMessage(ma, prompt) catch return self.failSave("out of memory");
|
||||
// When the session captured extract data, let the model test candidates on
|
||||
// it via `run_script`; otherwise fall back to a single no-tools synthesis.
|
||||
var verify: Verify = .{ .runtime = undefined };
|
||||
var run_tools: [1]ProviderTool = undefined;
|
||||
const verifying = blk: {
|
||||
// Gate on a captured extract: it means the session loaded the page and
|
||||
// left it in a state worth verifying against (and gives a prompt sample).
|
||||
if (self.last_extract_json == null) break :blk false;
|
||||
run_tools[0] = browser_tools.runScriptToolDef(ma) catch break :blk false;
|
||||
const runtime = ScriptRuntime.init(self.allocator, self.browser.app, self.session, &self.node_registry) catch break :blk false;
|
||||
verify.runtime = runtime;
|
||||
self.active_verify = &verify;
|
||||
self.script_runtime_mutex.lock();
|
||||
self.active_script_runtime = runtime;
|
||||
self.script_runtime_mutex.unlock();
|
||||
break :blk true;
|
||||
};
|
||||
defer if (verifying) {
|
||||
self.script_runtime_mutex.lock();
|
||||
self.active_script_runtime = null;
|
||||
self.script_runtime_mutex.unlock();
|
||||
self.active_verify = null;
|
||||
verify.runtime.cancelTerminate();
|
||||
verify.runtime.deinit();
|
||||
};
|
||||
|
||||
const sample: ?[]const u8 = if (verifying) blk: {
|
||||
const d = self.last_extract_json.?;
|
||||
break :blk d[0..@min(d.len, save_sample_cap)];
|
||||
} else null;
|
||||
const user_msg = self.buildSaveSynthesisMessage(ma, prompt, schema, sample) catch return self.failSave("out of memory");
|
||||
self.conversation.messages.append(self.allocator, .{ .role = .user, .content = user_msg }) catch return self.failSave("out of memory");
|
||||
|
||||
self.terminal.spinner.start();
|
||||
self.terminal.spinner.setStatus(if (verifying) "writing and testing the script" else "writing the script");
|
||||
var result = provider_client.runTools(
|
||||
self.model,
|
||||
&self.conversation.messages,
|
||||
@@ -983,10 +1152,10 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
|
||||
ma,
|
||||
.{ .context = @ptrCast(self), .callFn = handleToolCall },
|
||||
.{
|
||||
.tools = &.{},
|
||||
.max_turns = 1,
|
||||
.tools = if (verifying) run_tools[0..1] else &.{},
|
||||
.max_turns = if (verifying) 6 else 1,
|
||||
.max_tokens = 8192,
|
||||
.tool_choice = .none,
|
||||
.tool_choice = if (verifying) .auto else .none,
|
||||
.effort = .medium,
|
||||
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
|
||||
},
|
||||
@@ -1008,12 +1177,23 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
|
||||
return;
|
||||
}
|
||||
|
||||
const raw = result.text orelse return self.abortSave(baseline, "the model returned no script");
|
||||
// Prefer the last candidate that ran cleanly — it's verified, pure JS, with
|
||||
// none of the commentary the model sometimes wraps its final message in. Fall
|
||||
// back to the final text only when nothing ran (no extract data, or it never
|
||||
// called run_script).
|
||||
const raw: []const u8 = blk: {
|
||||
if (verifying) {
|
||||
if (verify.last_source) |s| break :blk s;
|
||||
}
|
||||
if (result.text) |t| {
|
||||
if (std.mem.trim(u8, t, &std.ascii.whitespace).len > 0) break :blk t;
|
||||
}
|
||||
return self.abortSave(baseline, "the model returned no script");
|
||||
};
|
||||
|
||||
// `result.text` lives in the conversation arena, freed by the rollback
|
||||
// below; copy into the command arena first (scrubbing may return its input
|
||||
// as-is).
|
||||
const owned = arena.dupe(u8, stripCodeFence(raw)) catch return self.abortSave(baseline, "out of memory");
|
||||
// `raw` lives in the conversation arena, freed by the rollback below; copy
|
||||
// into the command arena first (scrubbing may return its input as-is).
|
||||
const owned = arena.dupe(u8, string.stripCodeFence(raw)) catch return self.abortSave(baseline, "out of memory");
|
||||
const script = browser_tools.reverseSubstituteEnvVars(arena, owned) catch return self.abortSave(baseline, "out of memory");
|
||||
|
||||
// The save turn is a meta-action; keep it out of the ongoing conversation.
|
||||
@@ -1025,10 +1205,53 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
|
||||
};
|
||||
|
||||
self.rememberSavePath(path);
|
||||
self.save_buffer.reset();
|
||||
self.resetSaveBuffers();
|
||||
self.terminal.printInfo("Saved synthesized script to {s}", .{path});
|
||||
}
|
||||
|
||||
/// `run_script` tool handler: execute `source` on the dry-run runtime and hand
|
||||
/// the model back the completion value (or the error), so it can judge and fix
|
||||
/// its own script against real data.
|
||||
fn runScriptTool(self: *Agent, allocator: std.mem.Allocator, arguments: ?std.json.Value) zenai.provider.Client.ToolHandler.Result {
|
||||
const verify = self.active_verify.?;
|
||||
const args = browser_tools.parseArgsOrDefault(struct { source: []const u8 = "" }, allocator, arguments) catch
|
||||
return .{ .content = "invalid run_script arguments", .is_error = true };
|
||||
const source = args.source;
|
||||
if (source.len == 0) return .{ .content = "run_script requires a non-empty \"source\" string", .is_error = true };
|
||||
|
||||
// Start each candidate from a blank page, exactly like a standalone replay —
|
||||
// so a script that forgets to goto(...) fails here instead of silently relying
|
||||
// on the page the session left loaded.
|
||||
if (self.session.hasPage()) self.session.removePage();
|
||||
|
||||
const outcome = verify.runtime.runSourceCapture(source, "candidate.js") catch
|
||||
return .{ .content = "out of memory running candidate", .is_error = true };
|
||||
if (outcome.err) |e| {
|
||||
self.terminal.agentVerifyRun(oneLinePreview(allocator, e, 120), false);
|
||||
return .{ .content = std.fmt.allocPrint(allocator, "Script threw: {s}", .{e}) catch "Script threw an error", .is_error = true };
|
||||
}
|
||||
|
||||
// Keep the last source that ran cleanly — it's the verified, prose-free
|
||||
// artifact `synthesizeScript` saves, instead of the model's final message
|
||||
// (which may wrap the script in commentary).
|
||||
verify.last_source = self.conversation.arena.allocator().dupe(u8, source) catch source;
|
||||
|
||||
const body = if (outcome.output.len == 0) "(completion value is empty/undefined)" else outcome.output;
|
||||
self.terminal.agentVerifyRun(oneLinePreview(allocator, body, 120), true);
|
||||
const content = std.fmt.allocPrint(allocator, "Completion value:\n{s}", .{body}) catch body;
|
||||
return .{ .content = string.truncateWithMarker(allocator, content, tool_output_max_bytes), .is_error = false };
|
||||
}
|
||||
|
||||
/// Collapse `text` to a single trimmed line capped at `max` cells (with an
|
||||
/// ellipsis when cut) — a compact preview for the verify-run trace bullet.
|
||||
fn oneLinePreview(arena: std.mem.Allocator, text: []const u8, max: usize) []const u8 {
|
||||
const trimmed = std.mem.trim(u8, text, &std.ascii.whitespace);
|
||||
const first = trimmed[0 .. std.mem.indexOfScalar(u8, trimmed, '\n') orelse trimmed.len];
|
||||
if (first.len <= max) return first;
|
||||
const cut = string.truncateUtf8(first, max);
|
||||
return std.fmt.allocPrint(arena, "{s}…", .{cut}) catch cut;
|
||||
}
|
||||
|
||||
/// Persist `path` as the destination reused by a subsequent bare `/save`.
|
||||
fn rememberSavePath(self: *Agent, path: []const u8) void {
|
||||
if (self.save_path) |old| {
|
||||
@@ -1039,17 +1262,27 @@ fn rememberSavePath(self: *Agent, path: []const u8) void {
|
||||
self.save_path = dup;
|
||||
}
|
||||
|
||||
fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]const u8) ![]const u8 {
|
||||
fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]const u8, schema: ?[]const u8, sample: ?[]const u8) ![]const u8 {
|
||||
var out: std.Io.Writer.Allocating = .init(arena);
|
||||
const w = &out.writer;
|
||||
try w.writeAll(browser_tools.save_synthesis_prompt);
|
||||
try w.writeAll("\n\nBuiltin functions to prefer (call them as JS functions):\n");
|
||||
try w.writeAll("\n\nBuiltin functions (call them as JS functions). extract is the main way to read data — use it for every value you need; the rest navigate or act on the page:\n");
|
||||
try renderBuiltinCatalog(w);
|
||||
const recorded = self.save_buffer.bytes();
|
||||
if (recorded.len > 0) {
|
||||
try w.writeAll("\nCommands and JS that actually ran this session:\n");
|
||||
try w.writeAll(recorded);
|
||||
}
|
||||
if (schema) |s| {
|
||||
try w.writeAll("\nThe completion value must match this output schema (types are examples):\n");
|
||||
try w.writeAll(s);
|
||||
}
|
||||
if (sample) |data| {
|
||||
try w.writeAll("\nWhat a recorded extract returned this session, for reference:\n");
|
||||
try w.writeAll(data);
|
||||
try w.writeAll("\n\n");
|
||||
try w.writeAll(save_verify_addendum);
|
||||
}
|
||||
if (prompt) |p| {
|
||||
try w.writeAll("\nThe user's instruction for this script:\n");
|
||||
try w.writeAll(p);
|
||||
@@ -1062,26 +1295,26 @@ fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]
|
||||
/// dialect (e.g. `extract`'s schema format) without the tool schemas a no-tools
|
||||
/// synthesis turn omits.
|
||||
fn renderBuiltinCatalog(w: *std.Io.Writer) !void {
|
||||
// The primary builtins first; `evaluate` is held back and framed as a last
|
||||
// resort below, so it isn't presented as a peer way to read data.
|
||||
for (Schema.all()) |s| {
|
||||
if (!s.tool.isRecorded()) continue;
|
||||
try w.print("\n{s}(", .{s.tool_name});
|
||||
for (s.required, 0..) |req, i| {
|
||||
if (i != 0) try w.writeAll(", ");
|
||||
try w.writeAll(req);
|
||||
}
|
||||
try w.print("):\n{s}\n", .{s.description});
|
||||
if (!s.tool.isRecorded() or s.tool == .evaluate) continue;
|
||||
try renderBuiltinEntry(w, s);
|
||||
}
|
||||
for (Schema.all()) |s| {
|
||||
if (s.tool != .evaluate) continue;
|
||||
try w.writeAll("\nEscape hatch for advanced page interaction or page-side logic no builtin above can express — not for reading data extract can read:\n");
|
||||
try renderBuiltinEntry(w, s);
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip a surrounding ```` ```lang … ``` ```` markdown fence if the model
|
||||
/// wrapped its output in one despite being told not to.
|
||||
fn stripCodeFence(text: []const u8) []const u8 {
|
||||
const t = std.mem.trim(u8, text, &std.ascii.whitespace);
|
||||
if (!std.mem.startsWith(u8, t, "```")) return t;
|
||||
const first_nl = std.mem.indexOfScalar(u8, t, '\n') orelse return t;
|
||||
const body = t[first_nl + 1 ..];
|
||||
const close = std.mem.lastIndexOf(u8, body, "```") orelse return std.mem.trim(u8, body, &std.ascii.whitespace);
|
||||
return std.mem.trim(u8, body[0..close], &std.ascii.whitespace);
|
||||
fn renderBuiltinEntry(w: *std.Io.Writer, s: Schema) !void {
|
||||
try w.print("\n{s}(", .{s.tool_name});
|
||||
for (s.required, 0..) |req, i| {
|
||||
if (i != 0) try w.writeAll(", ");
|
||||
try w.writeAll(req);
|
||||
}
|
||||
try w.print("):\n{s}\n", .{s.description});
|
||||
}
|
||||
|
||||
fn logSaveBufferError(self: *Agent, err: anyerror) void {
|
||||
@@ -1309,9 +1542,9 @@ fn recordSlashToolCall(
|
||||
.arguments = if (args) |v| try zenai.json.dupeValue(ma, v) else null,
|
||||
};
|
||||
|
||||
// capToolOutput returns its input unchanged under the cap; dupe so content
|
||||
// doesn't alias the caller's per-iteration arena.
|
||||
const capped = capToolOutput(ma, result.text);
|
||||
// truncateWithMarker returns its input unchanged under the cap; dupe so
|
||||
// content doesn't alias the caller's per-iteration arena.
|
||||
const capped = string.truncateWithMarker(ma, result.text, tool_output_max_bytes);
|
||||
const content = if (capped.ptr == result.text.ptr) try ma.dupe(u8, capped) else capped;
|
||||
|
||||
const tool_results = try ma.alloc(zenai.provider.ToolResult, 1);
|
||||
@@ -1415,6 +1648,13 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
|
||||
if (!tc.is_error and t == .extract) last_extract_idx = i;
|
||||
}
|
||||
|
||||
// Keep the latest extract's real result so `/save` can ground and
|
||||
// verify its synthesized post-processing against actual data.
|
||||
if (last_extract_idx) |idx| {
|
||||
_ = self.last_extract_arena.reset(.retain_capacity);
|
||||
self.last_extract_json = self.last_extract_arena.allocator().dupe(u8, result.tool_calls_made[idx].result) catch null;
|
||||
}
|
||||
|
||||
var recorded_any = false;
|
||||
for (result.tool_calls_made, 0..) |tc, i| {
|
||||
if (tc.is_error) continue;
|
||||
@@ -1546,19 +1786,14 @@ fn buildUserMessageParts(
|
||||
// the next request body) without bound.
|
||||
const tool_output_max_bytes: usize = 1 * 1024 * 1024;
|
||||
|
||||
fn capToolOutput(allocator: std.mem.Allocator, output: []const u8) []const u8 {
|
||||
if (output.len <= tool_output_max_bytes) return output;
|
||||
const prefix = string.truncateUtf8(output, tool_output_max_bytes);
|
||||
var suffix_buf: [64]u8 = undefined;
|
||||
const suffix = std.fmt.bufPrint(&suffix_buf, "\n...[truncated, original {d} bytes]", .{output.len}) catch return prefix;
|
||||
return std.mem.concat(allocator, u8, &.{ prefix, suffix }) catch prefix;
|
||||
}
|
||||
|
||||
fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []const u8, arguments: ?std.json.Value) zenai.provider.Client.ToolHandler.Result {
|
||||
const self: *Agent = @ptrCast(@alignCast(ctx));
|
||||
// `run_script`'s only arg is the whole candidate script — too long and noisy
|
||||
// to render, so suppress it and let the label/phase carry the context.
|
||||
const is_run_script = self.active_verify != null and std.mem.eql(u8, tool_name, browser_tools.run_script_tool_name);
|
||||
// The spinner doesn't render args, and `agentToolDone` skips the body line
|
||||
// at low verbosity — don't pay for the stringify when nobody reads it.
|
||||
const needs_args = self.terminal.spinner.isEnabled() or self.terminal.verbosity != .low;
|
||||
const needs_args = !is_run_script and (self.terminal.spinner.isEnabled() or self.terminal.verbosity != .low);
|
||||
// Stringify the pre-substitution args so $LP_* placeholders the model
|
||||
// emitted stay redacted in the UI.
|
||||
const args_str: []const u8 = if (needs_args) (if (arguments) |v|
|
||||
@@ -1568,12 +1803,15 @@ fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []co
|
||||
self.terminal.spinner.setTool(tool_name, args_str);
|
||||
defer self.terminal.spinner.setThinking();
|
||||
|
||||
const outcome: zenai.provider.Client.ToolHandler.Result = if (browser_tools.call(allocator, self.session, &self.node_registry, tool_name, arguments)) |result|
|
||||
.{ .content = capToolOutput(allocator, result.text), .is_error = result.is_error }
|
||||
const outcome: zenai.provider.Client.ToolHandler.Result = if (is_run_script)
|
||||
self.runScriptTool(allocator, arguments)
|
||||
else if (browser_tools.call(allocator, self.session, &self.node_registry, tool_name, arguments)) |result|
|
||||
.{ .content = string.truncateWithMarker(allocator, result.text, tool_output_max_bytes), .is_error = result.is_error }
|
||||
else |err|
|
||||
.{ .content = std.fmt.allocPrint(allocator, "Error: {s}", .{@errorName(err)}) catch "Error: tool execution failed", .is_error = true };
|
||||
|
||||
self.terminal.agentToolDone(tool_name, args_str, !outcome.is_error);
|
||||
// run_script emits its own always-visible trace inside `runScriptTool`.
|
||||
if (!is_run_script) self.terminal.agentToolDone(tool_name, args_str, !outcome.is_error);
|
||||
if (self.terminal.verbosity == .high) self.terminal.printToolOutcome(tool_name, outcome.content, outcome.is_error);
|
||||
return outcome;
|
||||
}
|
||||
@@ -1644,35 +1882,6 @@ fn completionModels(context: *anyopaque, _: std.mem.Allocator) []const []const u
|
||||
return ids;
|
||||
}
|
||||
|
||||
test "capToolOutput: passes through when under cap" {
|
||||
const ta = std.testing.allocator;
|
||||
const out = capToolOutput(ta, "short");
|
||||
try std.testing.expectEqualStrings("short", out);
|
||||
}
|
||||
|
||||
// Boundary correctness lives in string.zig's `truncateUtf8` tests; here we only
|
||||
// assert the agent-specific policy: an over-cap body keeps valid UTF-8 and gains
|
||||
// the truncation marker.
|
||||
test "capToolOutput: appends a marker when truncating" {
|
||||
const ta = std.testing.allocator;
|
||||
|
||||
// 3-byte Hangul codepoint (U+D55C '한' = 0xED 0x95 0x9C) straddling the cap.
|
||||
const cap = tool_output_max_bytes;
|
||||
const buf = try ta.alloc(u8, cap + 8);
|
||||
defer ta.free(buf);
|
||||
@memset(buf[0 .. cap - 1], 'a');
|
||||
buf[cap - 1] = 0xED;
|
||||
buf[cap + 0] = 0x95;
|
||||
buf[cap + 1] = 0x9C;
|
||||
@memset(buf[cap + 2 ..], 'b');
|
||||
|
||||
const out = capToolOutput(ta, buf);
|
||||
defer if (out.ptr != buf.ptr) ta.free(out);
|
||||
|
||||
try std.testing.expect(std.unicode.utf8ValidateSlice(out));
|
||||
try std.testing.expect(std.mem.indexOf(u8, out, "truncated") != null);
|
||||
}
|
||||
|
||||
test "parseSaveCommand: filename only" {
|
||||
const r = try parseSaveCommand("out.js");
|
||||
try std.testing.expectEqualStrings("out.js", r.filename.?);
|
||||
@@ -1721,8 +1930,3 @@ test "renderBuiltinCatalog: lists recorded tools, omits read-only ones" {
|
||||
try std.testing.expect(std.mem.indexOf(u8, text, "tree(") == null);
|
||||
try std.testing.expect(std.mem.indexOf(u8, text, "markdown(") == null);
|
||||
}
|
||||
|
||||
test "stripCodeFence: unwraps a fenced block and passes plain text through" {
|
||||
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("```js\ngoto(\"x\");\n```"));
|
||||
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("goto(\"x\");"));
|
||||
}
|
||||
|
||||
@@ -71,6 +71,12 @@ cv: std.Thread.Condition = .{},
|
||||
state: State = .idle,
|
||||
frame: u8 = 0,
|
||||
|
||||
/// Custom label for the thinking state — a phase like "writing the script".
|
||||
/// Empty falls back to "thinking". Set via `setStatus`, cleared when the turn
|
||||
/// ends so the next turn starts plain.
|
||||
status_buf: [48]u8 = undefined,
|
||||
status_len: usize = 0,
|
||||
|
||||
tool_calls: u32 = 0,
|
||||
turn_started_ns: i128 = 0,
|
||||
|
||||
@@ -143,6 +149,7 @@ pub fn stop(self: *Spinner) void {
|
||||
_ = std.posix.write(std.posix.STDERR_FILENO, summary) catch {};
|
||||
|
||||
self.state = .idle;
|
||||
self.status_len = 0;
|
||||
self.last_render_len = 0;
|
||||
}
|
||||
|
||||
@@ -155,6 +162,7 @@ pub fn cancel(self: *Spinner) void {
|
||||
if (self.state == .idle) return;
|
||||
_ = std.posix.write(std.posix.STDERR_FILENO, "\r" ++ clear_eol) catch {};
|
||||
self.state = .idle;
|
||||
self.status_len = 0;
|
||||
self.last_render_len = 0;
|
||||
}
|
||||
|
||||
@@ -188,6 +196,22 @@ pub fn setTool(self: *Spinner, name: []const u8, args: []const u8) void {
|
||||
self.cv.signal();
|
||||
}
|
||||
|
||||
/// Label the thinking indicator with the current phase (e.g. "writing the
|
||||
/// script"). Stored even while a tool label is up or before `start()`, so it
|
||||
/// shows the moment the indicator next renders thinking. Cleared at turn end.
|
||||
pub fn setStatus(self: *Spinner, text: []const u8) void {
|
||||
if (!self.isEnabled()) return;
|
||||
self.mu.lock();
|
||||
defer self.mu.unlock();
|
||||
const t = truncateUtf8(text, self.status_buf.len);
|
||||
@memcpy(self.status_buf[0..t.len], t);
|
||||
self.status_len = t.len;
|
||||
if (self.state == .thinking) {
|
||||
self.renderLocked();
|
||||
self.cv.signal();
|
||||
}
|
||||
}
|
||||
|
||||
/// Request a transition back to the cycling "thinking" state. The worker
|
||||
/// honors `min_tool_display_ns`: if the current tool label has not been up
|
||||
/// long enough, the flip is deferred until it has.
|
||||
@@ -253,11 +277,14 @@ fn renderLocked(self: *Spinner) void {
|
||||
const glyph = braille[self.frame % braille.len];
|
||||
const written = switch (self.state) {
|
||||
.idle => return,
|
||||
.thinking => std.fmt.bufPrint(
|
||||
&buf,
|
||||
"\r" ++ ansi.yellow ++ "{s}" ++ ansi.reset ++ " " ++ ansi.dim ++ "[agent: thinking]" ++ ansi.reset ++ clear_eol,
|
||||
.{glyph},
|
||||
) catch return,
|
||||
.thinking => blk: {
|
||||
const label = if (self.status_len > 0) self.status_buf[0..self.status_len] else "thinking";
|
||||
break :blk std.fmt.bufPrint(
|
||||
&buf,
|
||||
"\r" ++ ansi.yellow ++ "{s}" ++ ansi.reset ++ " " ++ ansi.dim ++ "[agent: {s}]" ++ ansi.reset ++ clear_eol,
|
||||
.{ glyph, label },
|
||||
) catch return;
|
||||
},
|
||||
.tool => |tool| blk: {
|
||||
const prefix: []const u8 = if (tool.manual) "" else "agent: ";
|
||||
const name = tool.name_buf[0..tool.name_len];
|
||||
|
||||
@@ -212,9 +212,18 @@ pub fn endTool(self: *Terminal) void {
|
||||
/// text via the bullet character.
|
||||
pub fn agentToolDone(self: *Terminal, name: []const u8, args: []const u8, ok: bool) void {
|
||||
if (!self.verbosity.atLeast(.medium)) return;
|
||||
const spinner_on = self.spinner.isEnabled();
|
||||
self.emitToolBullet(name, args, ok);
|
||||
}
|
||||
|
||||
if (spinner_on) {
|
||||
/// Trace one `/save` candidate run. Unlike `agentToolDone` this is shown even at
|
||||
/// the REPL's default `.low` verbosity: the verify loop is an infrequent,
|
||||
/// user-initiated step the user needs to watch happen.
|
||||
pub fn agentVerifyRun(self: *Terminal, summary: []const u8, ok: bool) void {
|
||||
self.emitToolBullet("run_script", summary, ok);
|
||||
}
|
||||
|
||||
fn emitToolBullet(self: *Terminal, name: []const u8, args: []const u8, ok: bool) void {
|
||||
if (self.spinner.isEnabled()) {
|
||||
const a = if (self.repl_arena) |*ra| ra else return;
|
||||
defer _ = a.reset(.retain_capacity);
|
||||
const bytes = formatBulletLine(a.allocator(), name, args, ok) catch return;
|
||||
|
||||
@@ -144,37 +144,74 @@ pub const driver_guidance =
|
||||
/// Shared: the agent's `/save` feeds it to its own LLM; the MCP `save` tool
|
||||
/// hands it to the driving client as the tool description.
|
||||
pub const save_synthesis_prompt =
|
||||
\\Write a single Lightpanda agent script (.js) that reproduces what the user
|
||||
\\set out to do this session. Infer the goal from the whole conversation and
|
||||
\\keep only the steps a clean, repeatable script needs — drop failed attempts,
|
||||
\\retries, exploratory reads (tree/markdown/extract probes), and corrections.
|
||||
\\Pick the right layer for each step:
|
||||
\\- builtins (goto, click, fill, extract, …) for actions and for reading data;
|
||||
\\ extract is how you pull structured data out of a page.
|
||||
\\- plain top-level JavaScript for logic — loops, cross-page aggregation,
|
||||
\\ filtering, string building. It runs in the script, not the page.
|
||||
\\- evaluate(...) only for page-side JavaScript no builtin can express. It is
|
||||
\\ an escape hatch, not a default, and cannot see the script's variables —
|
||||
\\ interpolate any value into its string.
|
||||
\\Stay faithful to the recorded calls: same options each one actually used.
|
||||
\\Do NOT add a `timeout` to goto (or any tool) unless the session
|
||||
\\did. Never round-trip a result through `lp.*`, and never append no-op
|
||||
\\extract(...) probes or `evaluate("return lp....")` tails to surface output.
|
||||
\\The completion value — the last top-level expression — prints automatically
|
||||
\\(objects and arrays as JSON), so end with the bare result expression: a final
|
||||
\\`extract({...});`, or `results;` after an aggregation loop. No console.log,
|
||||
\\JSON.stringify, or `return` (illegal at top level) needed.
|
||||
\\Write modern, readable JavaScript: `for (const x of xs)`, `const`/`let` over
|
||||
\\`var`, template literals, destructuring, 2-space indent (including multi-line
|
||||
\\extract({...}) schemas).
|
||||
\\The script runs as a classic script, so top-level `await` is a syntax error.
|
||||
\\The builtins are synchronous — each returns its result directly, so never
|
||||
\\wrap them in async/await, .then, or Promises (`const data = extract(...)`,
|
||||
\\not `await extract(...)`). evaluate(...) may run async JS in the page, but
|
||||
\\the call itself returns synchronously.
|
||||
\\Output ONLY JavaScript source — no markdown fences, no commentary.
|
||||
\\Write a single Lightpanda agent script (.js) that reproduces what the user set
|
||||
\\out to do this session. Keep only the steps a clean, repeatable script needs —
|
||||
\\drop failed attempts, retries, and exploratory probes.
|
||||
\\Use the builtins for actions and data — extract is the main way to read data —
|
||||
\\and plain top-level JavaScript for logic (loops, aggregation, filtering, string
|
||||
\\work). Reserve evaluate(...) for advanced page interaction or page-side logic no
|
||||
\\builtin can express; it can't see the script's variables, so interpolate them into
|
||||
\\its string. Stay faithful to the recorded calls and their options (e.g. don't add
|
||||
\\a timeout the session didn't use).
|
||||
\\The last top-level expression prints automatically (objects/arrays as JSON), so
|
||||
\\end with the bare result — a final extract({...}); or results; after a loop. No
|
||||
\\console.log, JSON.stringify, or return.
|
||||
\\Top-level await is a syntax error and the builtins are synchronous, so never await
|
||||
\\them (const data = extract(...), not await extract(...)).
|
||||
\\If an output schema is given below, the completion value MUST match it — parse or
|
||||
\\split the extracted text as needed.
|
||||
\\Write modern, readable JavaScript (const/let, template literals, destructuring,
|
||||
\\2-space indent). Output ONLY JavaScript — no markdown fences, no commentary.
|
||||
;
|
||||
|
||||
/// Agent `/save`, step 1: distill the session into a one-sentence intent that
|
||||
/// feeds `save_schema_prompt`. Appended to the full conversation, so the model
|
||||
/// reads the turns; an explicit anchor (a `--task` or `/save` prompt) is added
|
||||
/// by the caller when present.
|
||||
pub const save_intent_prompt =
|
||||
\\In one sentence, state what the user set out to accomplish this session:
|
||||
\\the goal and the data or outcome they wanted — not the individual steps,
|
||||
\\tools, selectors, or page structure. Phrase it as a task description, e.g.
|
||||
\\"Go to HackerNews and retrieve the top 10 stories with their last 3
|
||||
\\comments (author and text)". Output ONLY that one sentence.
|
||||
;
|
||||
|
||||
/// Agent `/save`, step 2: turn the step-1 intent into a logical output schema.
|
||||
/// Given ONLY the intent — deliberately blind to the page and how data was
|
||||
/// fetched — so the resulting shape is stable across runs of the same session.
|
||||
pub const save_schema_prompt =
|
||||
\\Generate the JSON output schema describing the following intent. Do not
|
||||
\\focus on the intent context (the actual webpage structure or how to
|
||||
\\retrieve the data), just the logical JSON schema example. Do not provide
|
||||
\\actual data, just data types ("string", "number", "boolean").
|
||||
\\Example: the intent "Go to HackerNews and retrieve the top 10 stories with
|
||||
\\their last 3 comments with author and text" produces:
|
||||
\\{"results": [{"title": "string", "url": "string", "comments": [{"author": "string", "text": "string"}]}]}
|
||||
\\Output ONLY the JSON schema, no markdown fences, no commentary.
|
||||
\\
|
||||
\\Intent:
|
||||
;
|
||||
|
||||
/// Name of the agent `/save` verification tool — the model calls it to run a
|
||||
/// candidate script before finalizing. The agent dispatches it by this name.
|
||||
pub const run_script_tool_name = "run_script";
|
||||
|
||||
const run_script_tool_desc =
|
||||
"Run your full candidate script for real, from a blank page, and return its completion value " ++
|
||||
"(or error) — exactly as it will run when saved and replayed. It must navigate itself with " ++
|
||||
"goto(...). Use it to verify navigation, extraction, and your transform before finalizing.";
|
||||
|
||||
const run_script_params_json =
|
||||
\\{"type":"object","properties":{"source":{"type":"string","description":"Full JavaScript source of the candidate script to execute."}},"required":["source"]}
|
||||
;
|
||||
|
||||
/// The `run_script` tool definition for an LLM `/save` synthesis turn. `arena`
|
||||
/// backs the parsed parameter schema, so it must outlive the `runTools` call.
|
||||
pub fn runScriptToolDef(arena: std.mem.Allocator) !zenai.provider.Tool {
|
||||
const params = try std.json.parseFromSliceLeaky(std.json.Value, arena, run_script_params_json, .{});
|
||||
return .{ .name = run_script_tool_name, .description = run_script_tool_desc, .parameters = params };
|
||||
}
|
||||
|
||||
/// Reject paths that an untrusted MCP client could use to escape the
|
||||
/// working directory: empty paths, absolute paths, and any path with a
|
||||
/// `..` segment. Operator-controlled symlinks already inside CWD are out
|
||||
|
||||
@@ -95,6 +95,13 @@ pub const RunError = error{
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
/// A captured script run: `err` is the formatted failure (null on success);
|
||||
/// `output` is the completion value's display string (empty when void).
|
||||
pub const RunOutcome = struct {
|
||||
err: ?[]const u8 = null,
|
||||
output: []const u8 = "",
|
||||
};
|
||||
|
||||
pub fn init(
|
||||
allocator: std.mem.Allocator,
|
||||
app: *lp.App,
|
||||
@@ -238,6 +245,23 @@ fn setObjectProperty(
|
||||
/// compile/runtime exception returns a formatted error allocated in this
|
||||
/// runtime's call arena and valid until deinit or the next run.
|
||||
pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!?[]const u8 {
|
||||
return (try self.runInner(source, name, false)).err;
|
||||
}
|
||||
|
||||
/// Like `runSource`, but capture the completion value's display string instead
|
||||
/// of printing it (used by `/save` verification to feed output back to the LLM).
|
||||
/// Both fields live in the call arena — valid until the next run or deinit.
|
||||
///
|
||||
/// Each call gets a fresh global context so a prior candidate's top-level
|
||||
/// `const`/`let` doesn't collide ("Identifier 'x' has already been declared")
|
||||
/// — verification candidates are independent runs, unlike a `/load` script.
|
||||
pub fn runSourceCapture(self: *Runtime, source: []const u8, name: []const u8) RunError!RunOutcome {
|
||||
self.resetContext();
|
||||
self.createContext() catch return .{ .err = try self.dupeError("script context reset failed") };
|
||||
return self.runInner(source, name, true);
|
||||
}
|
||||
|
||||
fn runInner(self: *Runtime, source: []const u8, name: []const u8, capture: bool) RunError!RunOutcome {
|
||||
_ = self.call_arena.reset(.retain_capacity);
|
||||
|
||||
var hs: lp.js.HandleScope = undefined;
|
||||
@@ -245,7 +269,7 @@ pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!
|
||||
defer hs.deinit();
|
||||
|
||||
const context: *const v8.Context = @ptrCast(v8.v8__Global__Get(&self.context, self.env.isolate.handle) orelse
|
||||
return try self.dupeError("agent script context is not available"));
|
||||
return .{ .err = try self.dupeError("agent script context is not available") });
|
||||
v8.v8__Context__Enter(context);
|
||||
defer v8.v8__Context__Exit(context);
|
||||
|
||||
@@ -268,19 +292,27 @@ pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!
|
||||
&compiler_source,
|
||||
v8.kNoCompileOptions,
|
||||
v8.kNoCacheNoReason,
|
||||
) orelse return try self.formatCaught(context, &try_catch, "compile failed");
|
||||
) orelse return .{ .err = try self.formatCaught(context, &try_catch, "compile failed") };
|
||||
|
||||
const completion = v8.v8__Script__Run(script, context) orelse
|
||||
return try self.formatCaught(context, &try_catch, "script failed");
|
||||
return .{ .err = try self.formatCaught(context, &try_catch, "script failed") };
|
||||
|
||||
// Explicit microtask policy: promise continuations only run once drained.
|
||||
self.env.performIsolateMicrotasks();
|
||||
if (v8.v8__TryCatch__HasCaught(&try_catch)) {
|
||||
return try self.formatCaught(context, &try_catch, "script failed");
|
||||
return .{ .err = try self.formatCaught(context, &try_catch, "script failed") };
|
||||
}
|
||||
|
||||
if (capture) {
|
||||
if (v8.v8__Value__IsUndefined(completion)) return .{};
|
||||
const output = self.displayString(self.call_arena.allocator(), context, completion) catch |err| switch (err) {
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
error.JsException => return .{ .output = "<completion value could not be serialized>" },
|
||||
};
|
||||
return .{ .output = output };
|
||||
}
|
||||
self.printCompletion(context, completion);
|
||||
return null;
|
||||
return .{};
|
||||
}
|
||||
|
||||
/// Echo a script's completion value (its last-evaluated expression) so a script
|
||||
@@ -677,10 +709,10 @@ test "agent script runtime: goto and evaluate dispatch through browser tools" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -698,10 +730,10 @@ test "agent script runtime: extract returns a JavaScript object" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -752,10 +784,10 @@ test "agent script runtime: extract tolerates list selectors that match nothing"
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -780,10 +812,10 @@ test "agent script runtime: strict-mode scripts can call primitives" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -798,10 +830,10 @@ test "agent script runtime: strict-mode scripts can call primitives" {
|
||||
test "agent script runtime: promise microtasks run to completion" {
|
||||
defer testing.reset();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -819,10 +851,10 @@ test "agent script runtime: primitives re-entered from argument callbacks stay i
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -841,10 +873,10 @@ test "agent script runtime: terminate interrupts local JavaScript" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
const thread = try std.Thread.spawn(.{}, terminateRuntimeSoon, .{runtime});
|
||||
@@ -859,10 +891,10 @@ test "agent script runtime: agent variables persist and page globals are isolate
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -884,10 +916,10 @@ test "agent script runtime: page evaluate cannot see agent primitives or binding
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -902,10 +934,10 @@ test "agent script runtime: page evaluate cannot see agent primitives or binding
|
||||
test "agent script runtime: console is available in agent context" {
|
||||
defer testing.reset();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -919,10 +951,10 @@ test "agent script runtime: tool errors throw and stop execution" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
const message = (try runtime.runSource(
|
||||
@@ -945,10 +977,10 @@ test "agent script runtime: builtin argument marshalling (positional + options)"
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry = CDPNode.Registry.init(testing.allocator);
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
try runTestScript(runtime,
|
||||
@@ -994,3 +1026,72 @@ test "agent script runtime: builtin argument marshalling (positional + options)"
|
||||
try testing.expect(std.mem.indexOf(u8, message, "invalid arguments") != null);
|
||||
}
|
||||
}
|
||||
|
||||
test "agent script runtime: runSourceCapture runs the full script live and captures completion" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
// Verification runs the candidate exactly as a standalone replay does — it
|
||||
// must navigate itself; extract then runs against the page it loaded.
|
||||
const outcome = try runtime.runSourceCapture(
|
||||
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
|
||||
\\click("#btn");
|
||||
\\const data = extract({ label: "#btn" });
|
||||
\\data.label;
|
||||
, "candidate.js");
|
||||
try testing.expect(outcome.err == null);
|
||||
try std.testing.expectEqualStrings("Click Me", outcome.output);
|
||||
}
|
||||
|
||||
test "agent script runtime: runSourceCapture surfaces a candidate's error" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
// extract returns an object; treating it as an array throws — the real
|
||||
// `raw.map is not a function` failure the model must see and fix.
|
||||
const bad = try runtime.runSourceCapture(
|
||||
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
|
||||
\\const raw = extract({ items: [{ selector: "li" }] });
|
||||
\\raw.map(x => x);
|
||||
, "candidate.js");
|
||||
try testing.expect(bad.err != null);
|
||||
}
|
||||
|
||||
test "agent script runtime: each capture run gets a fresh global scope" {
|
||||
defer testing.reset();
|
||||
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
|
||||
|
||||
var registry: CDPNode.Registry = .init(testing.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, ®istry);
|
||||
defer runtime.deinit();
|
||||
|
||||
// The same top-level `const` in two consecutive candidates must not collide —
|
||||
// each run starts from a clean context, not the previous run's globals.
|
||||
const src =
|
||||
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
|
||||
\\const data = extract({ label: "#btn" });
|
||||
\\const out = data.label;
|
||||
\\out;
|
||||
;
|
||||
const first = try runtime.runSourceCapture(src, "candidate.js");
|
||||
try testing.expect(first.err == null);
|
||||
try std.testing.expectEqualStrings("Click Me", first.output);
|
||||
|
||||
const second = try runtime.runSourceCapture(src, "candidate.js");
|
||||
try testing.expect(second.err == null);
|
||||
try std.testing.expectEqualStrings("Click Me", second.output);
|
||||
}
|
||||
|
||||
@@ -332,6 +332,29 @@ pub fn truncateUtf8(bytes: []const u8, max_bytes: usize) []const u8 {
|
||||
return bytes[0..i];
|
||||
}
|
||||
|
||||
/// Truncate `text` to at most `max_bytes` on a UTF-8 boundary and, when it
|
||||
/// overflows, append a marker noting the original length. Returns `text`
|
||||
/// unchanged when it fits; otherwise allocates the marked result in `allocator`
|
||||
/// (falling back to the bare prefix if that allocation fails).
|
||||
pub fn truncateWithMarker(allocator: std.mem.Allocator, text: []const u8, max_bytes: usize) []const u8 {
|
||||
if (text.len <= max_bytes) return text;
|
||||
const prefix = truncateUtf8(text, max_bytes);
|
||||
var suffix_buf: [64]u8 = undefined;
|
||||
const suffix = std.fmt.bufPrint(&suffix_buf, "\n...[truncated, original {d} bytes]", .{text.len}) catch return prefix;
|
||||
return std.mem.concat(allocator, u8, &.{ prefix, suffix }) catch prefix;
|
||||
}
|
||||
|
||||
/// Strip a surrounding ```lang … ``` markdown fence if the text is wrapped in
|
||||
/// one, returning the inner block; passes already-bare text through unchanged.
|
||||
pub fn stripCodeFence(text: []const u8) []const u8 {
|
||||
const t = std.mem.trim(u8, text, &std.ascii.whitespace);
|
||||
if (!std.mem.startsWith(u8, t, "```")) return t;
|
||||
const first_nl = std.mem.indexOfScalar(u8, t, '\n') orelse return t;
|
||||
const body = t[first_nl + 1 ..];
|
||||
const close = std.mem.lastIndexOf(u8, body, "```") orelse return std.mem.trim(u8, body, &std.ascii.whitespace);
|
||||
return std.mem.trim(u8, body[0..close], &std.ascii.whitespace);
|
||||
}
|
||||
|
||||
// Discriminatory type that signals the bridge to use arena instead of call_arena
|
||||
// Use this for strings that need to persist beyond the current call
|
||||
// The caller can unwrap and store just the underlying .str field
|
||||
@@ -378,6 +401,32 @@ test "truncateUtf8" {
|
||||
try testing.expectEqual("\xFFx", truncateUtf8("\xFFx", 2));
|
||||
}
|
||||
|
||||
test "truncateWithMarker" {
|
||||
const ta = std.testing.allocator;
|
||||
try std.testing.expectEqualStrings("short", truncateWithMarker(ta, "short", 1024));
|
||||
|
||||
// Over-cap: a 3-byte Hangul codepoint (U+D55C, 0xED 0x95 0x9C) straddling the
|
||||
// cap must stay valid UTF-8, and the marker must be appended.
|
||||
const cap: usize = 1024;
|
||||
const buf = try ta.alloc(u8, cap + 8);
|
||||
defer ta.free(buf);
|
||||
@memset(buf[0 .. cap - 1], 'a');
|
||||
buf[cap - 1] = 0xED;
|
||||
buf[cap + 0] = 0x95;
|
||||
buf[cap + 1] = 0x9C;
|
||||
@memset(buf[cap + 2 ..], 'b');
|
||||
|
||||
const out = truncateWithMarker(ta, buf, cap);
|
||||
defer if (out.ptr != buf.ptr) ta.free(out);
|
||||
try std.testing.expect(std.unicode.utf8ValidateSlice(out));
|
||||
try std.testing.expect(std.mem.indexOf(u8, out, "truncated") != null);
|
||||
}
|
||||
|
||||
test "stripCodeFence" {
|
||||
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("```js\ngoto(\"x\");\n```"));
|
||||
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("goto(\"x\");"));
|
||||
}
|
||||
|
||||
test "String" {
|
||||
const other_short = try String.init(undefined, "other_short", .{});
|
||||
const other_long = try String.init(testing.allocator, "other_long" ** 100, .{});
|
||||
|
||||
Reference in New Issue
Block a user