agent: verify synthesized scripts during /save

Introduces a multi-step synthesis process for `/save` that derives a
logical JSON output schema and uses a dry-run runtime to verify
candidate scripts. The LLM can now run and self-correct its scripts
using a new `run_script` tool before finalizing the save.
This commit is contained in:
Adrià Arrufat
2026-06-09 16:49:50 +02:00
parent b23c6c27bc
commit b141da30ca
6 changed files with 574 additions and 147 deletions

View File

@@ -110,6 +110,14 @@ node_registry: CDPNode.Registry,
terminal: Terminal,
save_buffer: Recorder,
save_path: ?[]u8,
/// Backs `last_extract_json`; reset alongside `save_buffer`.
last_extract_arena: std.heap.ArenaAllocator,
/// The JSON the most recent successful `extract` returned this session — the
/// real data `/save` grounds and verifies its synthesized script against.
last_extract_json: ?[]const u8 = null,
/// Set for the duration of an LLM `/save` so the `run_script` tool can reach
/// the dry-run runtime it executes candidates on.
active_verify: ?*Verify = null,
script_runtime_mutex: std.Thread.Mutex = .{},
active_script_runtime: ?*ScriptRuntime = null,
conversation: Conversation,
@@ -254,6 +262,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
.terminal = .init(allocator, history_paths, verbosity, will_repl),
.save_buffer = .init(allocator),
.save_path = null,
.last_extract_arena = .init(allocator),
.conversation = .init(allocator, opts.system_prompt orelse default_system_prompt),
.model = model,
.effort = effort,
@@ -294,6 +303,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
pub fn deinit(self: *Agent) void {
self.terminal.uninstallLogSink();
self.save_buffer.deinit();
self.last_extract_arena.deinit();
if (self.save_path) |p| self.allocator.free(p);
self.terminal.deinit();
self.conversation.deinit();
@@ -630,11 +640,19 @@ fn handleUsage(self: *Agent) void {
/// node IDs. Shared by `/clear` and `/reset`.
fn clearConversation(self: *Agent) void {
self.conversation.rollback(0);
self.save_buffer.reset();
self.resetSaveBuffers();
self.total_usage = .{};
self.node_registry.reset();
}
/// Drop everything `/save` accumulates: the recorded action buffer and the
/// captured extract data that grounds synthesis.
fn resetSaveBuffers(self: *Agent) void {
self.save_buffer.reset();
_ = self.last_extract_arena.reset(.retain_capacity);
self.last_extract_json = null;
}
/// Forget the conversation while leaving the browser session live — loaded page
/// stays put, cookies/logins preserved.
fn handleClear(self: *Agent) void {
@@ -862,7 +880,7 @@ fn handleSave(self: *Agent, arena: std.mem.Allocator, rest: []const u8) void {
new_save_path = null;
}
const saved_lines = self.save_buffer.lines;
self.save_buffer.reset();
self.resetSaveBuffers();
self.terminal.printInfo("Saved {d} line(s) to {s}", .{ saved_lines, self.save_path.? });
}
@@ -958,10 +976,130 @@ fn abortSave(self: *Agent, baseline: usize, reason: []const u8) void {
self.failSave(reason);
}
/// LLM-synthesized `/save`: hand the model the builtin catalog, the full
/// conversation, and the deterministic record of what ran, then write the
/// idiomatic script it returns.
/// In-flight `/save` verification harness: the dry-run runtime the `run_script`
/// tool executes candidates on, plus the last source it ran (a fallback script
/// if the model finishes the loop without re-emitting it as text).
const Verify = struct {
runtime: *ScriptRuntime,
last_source: ?[]const u8 = null,
};
/// Agent-only addendum (kept out of the shared `save_synthesis_prompt`) telling
/// the model to derive every value at runtime and check the result with run_script.
const save_verify_addendum =
\\Read data with the recorded extract(...), not evaluate() — extract can read a
\\card's whole text via an empty selector (""). Reshape its result in plain JS so the
\\completion value matches the schema exactly (same keys, parsed numbers); don't
\\return the raw extract or hard-code values.
\\Before finalizing, test with run_script: it runs your FULL script for real from a
\\blank page, so it must goto(...) first (missing goto → "no page loaded", a wrong
\\selector → null). Confirm every field is populated, then reply with ONLY the final
\\JavaScript source.
;
/// Cap on the captured extract sample shown in the synthesis prompt (the full
/// data still feeds the dry run); keeps a large result from dominating context.
const save_sample_cap = 8 * 1024;
/// LLM-synthesized `/save`. Pin the output shape first — derive the session's
/// intent, then a typed output schema from it — so the script's result shape is
/// stable across runs, then synthesize the script honoring that schema. Each
/// step degrades gracefully: a null schema falls back to plain synthesis.
fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8, prompt: ?[]const u8) void {
self.conversation.ensureSystemPrompt() catch return self.failSave("out of memory");
const baseline = self.conversation.messages.items.len;
const anchor = prompt orelse self.one_shot_task;
const schema = self.deriveOutputSchema(arena, baseline, anchor);
if (self.cancel_requested.load(.acquire)) {
self.resetAfterCancel(baseline);
return;
}
self.synthesizeScript(arena, filename, prompt, schema);
}
/// Steps 12 of `/save`: intent (over the session) → typed output schema. Both
/// turns leave the conversation as they found it; returns null if either turn
/// produced nothing usable (the caller then synthesizes without a schema).
fn deriveOutputSchema(self: *Agent, arena: std.mem.Allocator, baseline: usize, anchor: ?[]const u8) ?[]const u8 {
const intent = self.deriveIntent(arena, baseline, anchor) orelse return null;
if (self.cancel_requested.load(.acquire)) return null;
return self.deriveSchema(arena, intent);
}
/// One-sentence intent from the session turns. Runs over the live conversation
/// (so the model sees the session) but rolls back to `baseline`, keeping the
/// turn out of history. An explicit anchor is folded in as authoritative.
fn deriveIntent(self: *Agent, arena: std.mem.Allocator, baseline: usize, anchor: ?[]const u8) ?[]const u8 {
const ma = self.conversation.arena.allocator();
var out: std.Io.Writer.Allocating = .init(ma);
out.writer.writeAll(browser_tools.save_intent_prompt) catch return null;
if (anchor) |a| {
out.writer.print("\nThe user described the goal as: {s}\nTreat that as authoritative and reconcile it with the session.", .{a}) catch return null;
}
self.conversation.messages.append(self.allocator, .{ .role = .user, .content = out.written() }) catch return null;
defer self.conversation.rollback(baseline);
return self.runTextTurn(&self.conversation.messages, arena, self.allocator, ma, 512, "understanding the task");
}
/// Typed output schema from the intent. Runs over a throwaway message list —
/// not the conversation — so the schema is derived from the logical intent
/// alone, blind to the page structure and how the data was fetched.
fn deriveSchema(self: *Agent, arena: std.mem.Allocator, intent: []const u8) ?[]const u8 {
var msgs: std.ArrayList(zenai.provider.Message) = .empty;
const msg = std.fmt.allocPrint(arena, "{s} {s}", .{ browser_tools.save_schema_prompt, intent }) catch return null;
msgs.append(arena, .{ .role = .user, .content = msg }) catch return null;
const raw = self.runTextTurn(&msgs, arena, arena, arena, 1024, "designing the output schema") orelse return null;
return string.stripCodeFence(raw);
}
/// Run a single no-tools text turn over `messages` and return the model's text
/// duped into `dest` (so it survives any rollback of `messages`), or null on
/// cancel, error, or empty output. Shared by the intent and schema steps.
fn runTextTurn(
self: *Agent,
messages: *std.ArrayList(zenai.provider.Message),
dest: std.mem.Allocator,
list_alloc: std.mem.Allocator,
data_alloc: std.mem.Allocator,
max_tokens: i32,
status: []const u8,
) ?[]const u8 {
self.terminal.spinner.start();
self.terminal.spinner.setStatus(status);
var result = self.ai_client.?.runTools(
self.model,
messages,
list_alloc,
data_alloc,
.{ .context = @ptrCast(self), .callFn = handleToolCall },
.{
.tools = &.{},
.max_turns = 1,
.max_tokens = max_tokens,
.tool_choice = .none,
.effort = .low,
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
},
) catch |err| {
self.terminal.spinner.cancel();
if (!self.cancel_requested.load(.acquire)) log.err(.app, "AI save schema turn error", .{ .err = err });
return null;
};
self.terminal.spinner.stop();
defer result.deinit();
self.total_usage.add(result.usage);
if (result.cancelled) return null;
const text = std.mem.trim(u8, result.text orelse return null, &std.ascii.whitespace);
if (text.len == 0) return null;
return dest.dupe(u8, text) catch null;
}
/// Step 3 of `/save`: hand the model the builtin catalog, the full conversation,
/// the deterministic record of what ran, and the required output schema, then
/// write the idiomatic script it returns.
fn synthesizeScript(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8, prompt: ?[]const u8, schema: ?[]const u8) void {
const provider_client = self.ai_client.?;
const resolved = self.resolveSavePathAndMode(arena, filename) orelse return;
@@ -972,10 +1110,41 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
const ma = self.conversation.arena.allocator();
const baseline = self.conversation.messages.items.len;
const user_msg = self.buildSaveSynthesisMessage(ma, prompt) catch return self.failSave("out of memory");
// When the session captured extract data, let the model test candidates on
// it via `run_script`; otherwise fall back to a single no-tools synthesis.
var verify: Verify = .{ .runtime = undefined };
var run_tools: [1]ProviderTool = undefined;
const verifying = blk: {
// Gate on a captured extract: it means the session loaded the page and
// left it in a state worth verifying against (and gives a prompt sample).
if (self.last_extract_json == null) break :blk false;
run_tools[0] = browser_tools.runScriptToolDef(ma) catch break :blk false;
const runtime = ScriptRuntime.init(self.allocator, self.browser.app, self.session, &self.node_registry) catch break :blk false;
verify.runtime = runtime;
self.active_verify = &verify;
self.script_runtime_mutex.lock();
self.active_script_runtime = runtime;
self.script_runtime_mutex.unlock();
break :blk true;
};
defer if (verifying) {
self.script_runtime_mutex.lock();
self.active_script_runtime = null;
self.script_runtime_mutex.unlock();
self.active_verify = null;
verify.runtime.cancelTerminate();
verify.runtime.deinit();
};
const sample: ?[]const u8 = if (verifying) blk: {
const d = self.last_extract_json.?;
break :blk d[0..@min(d.len, save_sample_cap)];
} else null;
const user_msg = self.buildSaveSynthesisMessage(ma, prompt, schema, sample) catch return self.failSave("out of memory");
self.conversation.messages.append(self.allocator, .{ .role = .user, .content = user_msg }) catch return self.failSave("out of memory");
self.terminal.spinner.start();
self.terminal.spinner.setStatus(if (verifying) "writing and testing the script" else "writing the script");
var result = provider_client.runTools(
self.model,
&self.conversation.messages,
@@ -983,10 +1152,10 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
ma,
.{ .context = @ptrCast(self), .callFn = handleToolCall },
.{
.tools = &.{},
.max_turns = 1,
.tools = if (verifying) run_tools[0..1] else &.{},
.max_turns = if (verifying) 6 else 1,
.max_tokens = 8192,
.tool_choice = .none,
.tool_choice = if (verifying) .auto else .none,
.effort = .medium,
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
},
@@ -1008,12 +1177,23 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
return;
}
const raw = result.text orelse return self.abortSave(baseline, "the model returned no script");
// Prefer the last candidate that ran cleanly — it's verified, pure JS, with
// none of the commentary the model sometimes wraps its final message in. Fall
// back to the final text only when nothing ran (no extract data, or it never
// called run_script).
const raw: []const u8 = blk: {
if (verifying) {
if (verify.last_source) |s| break :blk s;
}
if (result.text) |t| {
if (std.mem.trim(u8, t, &std.ascii.whitespace).len > 0) break :blk t;
}
return self.abortSave(baseline, "the model returned no script");
};
// `result.text` lives in the conversation arena, freed by the rollback
// below; copy into the command arena first (scrubbing may return its input
// as-is).
const owned = arena.dupe(u8, stripCodeFence(raw)) catch return self.abortSave(baseline, "out of memory");
// `raw` lives in the conversation arena, freed by the rollback below; copy
// into the command arena first (scrubbing may return its input as-is).
const owned = arena.dupe(u8, string.stripCodeFence(raw)) catch return self.abortSave(baseline, "out of memory");
const script = browser_tools.reverseSubstituteEnvVars(arena, owned) catch return self.abortSave(baseline, "out of memory");
// The save turn is a meta-action; keep it out of the ongoing conversation.
@@ -1025,10 +1205,53 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
};
self.rememberSavePath(path);
self.save_buffer.reset();
self.resetSaveBuffers();
self.terminal.printInfo("Saved synthesized script to {s}", .{path});
}
/// `run_script` tool handler: execute `source` on the dry-run runtime and hand
/// the model back the completion value (or the error), so it can judge and fix
/// its own script against real data.
fn runScriptTool(self: *Agent, allocator: std.mem.Allocator, arguments: ?std.json.Value) zenai.provider.Client.ToolHandler.Result {
const verify = self.active_verify.?;
const args = browser_tools.parseArgsOrDefault(struct { source: []const u8 = "" }, allocator, arguments) catch
return .{ .content = "invalid run_script arguments", .is_error = true };
const source = args.source;
if (source.len == 0) return .{ .content = "run_script requires a non-empty \"source\" string", .is_error = true };
// Start each candidate from a blank page, exactly like a standalone replay —
// so a script that forgets to goto(...) fails here instead of silently relying
// on the page the session left loaded.
if (self.session.hasPage()) self.session.removePage();
const outcome = verify.runtime.runSourceCapture(source, "candidate.js") catch
return .{ .content = "out of memory running candidate", .is_error = true };
if (outcome.err) |e| {
self.terminal.agentVerifyRun(oneLinePreview(allocator, e, 120), false);
return .{ .content = std.fmt.allocPrint(allocator, "Script threw: {s}", .{e}) catch "Script threw an error", .is_error = true };
}
// Keep the last source that ran cleanly — it's the verified, prose-free
// artifact `synthesizeScript` saves, instead of the model's final message
// (which may wrap the script in commentary).
verify.last_source = self.conversation.arena.allocator().dupe(u8, source) catch source;
const body = if (outcome.output.len == 0) "(completion value is empty/undefined)" else outcome.output;
self.terminal.agentVerifyRun(oneLinePreview(allocator, body, 120), true);
const content = std.fmt.allocPrint(allocator, "Completion value:\n{s}", .{body}) catch body;
return .{ .content = string.truncateWithMarker(allocator, content, tool_output_max_bytes), .is_error = false };
}
/// Collapse `text` to a single trimmed line capped at `max` cells (with an
/// ellipsis when cut) — a compact preview for the verify-run trace bullet.
fn oneLinePreview(arena: std.mem.Allocator, text: []const u8, max: usize) []const u8 {
const trimmed = std.mem.trim(u8, text, &std.ascii.whitespace);
const first = trimmed[0 .. std.mem.indexOfScalar(u8, trimmed, '\n') orelse trimmed.len];
if (first.len <= max) return first;
const cut = string.truncateUtf8(first, max);
return std.fmt.allocPrint(arena, "{s}…", .{cut}) catch cut;
}
/// Persist `path` as the destination reused by a subsequent bare `/save`.
fn rememberSavePath(self: *Agent, path: []const u8) void {
if (self.save_path) |old| {
@@ -1039,17 +1262,27 @@ fn rememberSavePath(self: *Agent, path: []const u8) void {
self.save_path = dup;
}
fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]const u8) ![]const u8 {
fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]const u8, schema: ?[]const u8, sample: ?[]const u8) ![]const u8 {
var out: std.Io.Writer.Allocating = .init(arena);
const w = &out.writer;
try w.writeAll(browser_tools.save_synthesis_prompt);
try w.writeAll("\n\nBuiltin functions to prefer (call them as JS functions):\n");
try w.writeAll("\n\nBuiltin functions (call them as JS functions). extract is the main way to read data — use it for every value you need; the rest navigate or act on the page:\n");
try renderBuiltinCatalog(w);
const recorded = self.save_buffer.bytes();
if (recorded.len > 0) {
try w.writeAll("\nCommands and JS that actually ran this session:\n");
try w.writeAll(recorded);
}
if (schema) |s| {
try w.writeAll("\nThe completion value must match this output schema (types are examples):\n");
try w.writeAll(s);
}
if (sample) |data| {
try w.writeAll("\nWhat a recorded extract returned this session, for reference:\n");
try w.writeAll(data);
try w.writeAll("\n\n");
try w.writeAll(save_verify_addendum);
}
if (prompt) |p| {
try w.writeAll("\nThe user's instruction for this script:\n");
try w.writeAll(p);
@@ -1062,26 +1295,26 @@ fn buildSaveSynthesisMessage(self: *Agent, arena: std.mem.Allocator, prompt: ?[]
/// dialect (e.g. `extract`'s schema format) without the tool schemas a no-tools
/// synthesis turn omits.
fn renderBuiltinCatalog(w: *std.Io.Writer) !void {
// The primary builtins first; `evaluate` is held back and framed as a last
// resort below, so it isn't presented as a peer way to read data.
for (Schema.all()) |s| {
if (!s.tool.isRecorded()) continue;
try w.print("\n{s}(", .{s.tool_name});
for (s.required, 0..) |req, i| {
if (i != 0) try w.writeAll(", ");
try w.writeAll(req);
}
try w.print("):\n{s}\n", .{s.description});
if (!s.tool.isRecorded() or s.tool == .evaluate) continue;
try renderBuiltinEntry(w, s);
}
for (Schema.all()) |s| {
if (s.tool != .evaluate) continue;
try w.writeAll("\nEscape hatch for advanced page interaction or page-side logic no builtin above can express — not for reading data extract can read:\n");
try renderBuiltinEntry(w, s);
}
}
/// Strip a surrounding ```` ```lang … ``` ```` markdown fence if the model
/// wrapped its output in one despite being told not to.
fn stripCodeFence(text: []const u8) []const u8 {
const t = std.mem.trim(u8, text, &std.ascii.whitespace);
if (!std.mem.startsWith(u8, t, "```")) return t;
const first_nl = std.mem.indexOfScalar(u8, t, '\n') orelse return t;
const body = t[first_nl + 1 ..];
const close = std.mem.lastIndexOf(u8, body, "```") orelse return std.mem.trim(u8, body, &std.ascii.whitespace);
return std.mem.trim(u8, body[0..close], &std.ascii.whitespace);
fn renderBuiltinEntry(w: *std.Io.Writer, s: Schema) !void {
try w.print("\n{s}(", .{s.tool_name});
for (s.required, 0..) |req, i| {
if (i != 0) try w.writeAll(", ");
try w.writeAll(req);
}
try w.print("):\n{s}\n", .{s.description});
}
fn logSaveBufferError(self: *Agent, err: anyerror) void {
@@ -1309,9 +1542,9 @@ fn recordSlashToolCall(
.arguments = if (args) |v| try zenai.json.dupeValue(ma, v) else null,
};
// capToolOutput returns its input unchanged under the cap; dupe so content
// doesn't alias the caller's per-iteration arena.
const capped = capToolOutput(ma, result.text);
// truncateWithMarker returns its input unchanged under the cap; dupe so
// content doesn't alias the caller's per-iteration arena.
const capped = string.truncateWithMarker(ma, result.text, tool_output_max_bytes);
const content = if (capped.ptr == result.text.ptr) try ma.dupe(u8, capped) else capped;
const tool_results = try ma.alloc(zenai.provider.ToolResult, 1);
@@ -1415,6 +1648,13 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
if (!tc.is_error and t == .extract) last_extract_idx = i;
}
// Keep the latest extract's real result so `/save` can ground and
// verify its synthesized post-processing against actual data.
if (last_extract_idx) |idx| {
_ = self.last_extract_arena.reset(.retain_capacity);
self.last_extract_json = self.last_extract_arena.allocator().dupe(u8, result.tool_calls_made[idx].result) catch null;
}
var recorded_any = false;
for (result.tool_calls_made, 0..) |tc, i| {
if (tc.is_error) continue;
@@ -1546,19 +1786,14 @@ fn buildUserMessageParts(
// the next request body) without bound.
const tool_output_max_bytes: usize = 1 * 1024 * 1024;
fn capToolOutput(allocator: std.mem.Allocator, output: []const u8) []const u8 {
if (output.len <= tool_output_max_bytes) return output;
const prefix = string.truncateUtf8(output, tool_output_max_bytes);
var suffix_buf: [64]u8 = undefined;
const suffix = std.fmt.bufPrint(&suffix_buf, "\n...[truncated, original {d} bytes]", .{output.len}) catch return prefix;
return std.mem.concat(allocator, u8, &.{ prefix, suffix }) catch prefix;
}
fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []const u8, arguments: ?std.json.Value) zenai.provider.Client.ToolHandler.Result {
const self: *Agent = @ptrCast(@alignCast(ctx));
// `run_script`'s only arg is the whole candidate script — too long and noisy
// to render, so suppress it and let the label/phase carry the context.
const is_run_script = self.active_verify != null and std.mem.eql(u8, tool_name, browser_tools.run_script_tool_name);
// The spinner doesn't render args, and `agentToolDone` skips the body line
// at low verbosity — don't pay for the stringify when nobody reads it.
const needs_args = self.terminal.spinner.isEnabled() or self.terminal.verbosity != .low;
const needs_args = !is_run_script and (self.terminal.spinner.isEnabled() or self.terminal.verbosity != .low);
// Stringify the pre-substitution args so $LP_* placeholders the model
// emitted stay redacted in the UI.
const args_str: []const u8 = if (needs_args) (if (arguments) |v|
@@ -1568,12 +1803,15 @@ fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []co
self.terminal.spinner.setTool(tool_name, args_str);
defer self.terminal.spinner.setThinking();
const outcome: zenai.provider.Client.ToolHandler.Result = if (browser_tools.call(allocator, self.session, &self.node_registry, tool_name, arguments)) |result|
.{ .content = capToolOutput(allocator, result.text), .is_error = result.is_error }
const outcome: zenai.provider.Client.ToolHandler.Result = if (is_run_script)
self.runScriptTool(allocator, arguments)
else if (browser_tools.call(allocator, self.session, &self.node_registry, tool_name, arguments)) |result|
.{ .content = string.truncateWithMarker(allocator, result.text, tool_output_max_bytes), .is_error = result.is_error }
else |err|
.{ .content = std.fmt.allocPrint(allocator, "Error: {s}", .{@errorName(err)}) catch "Error: tool execution failed", .is_error = true };
self.terminal.agentToolDone(tool_name, args_str, !outcome.is_error);
// run_script emits its own always-visible trace inside `runScriptTool`.
if (!is_run_script) self.terminal.agentToolDone(tool_name, args_str, !outcome.is_error);
if (self.terminal.verbosity == .high) self.terminal.printToolOutcome(tool_name, outcome.content, outcome.is_error);
return outcome;
}
@@ -1644,35 +1882,6 @@ fn completionModels(context: *anyopaque, _: std.mem.Allocator) []const []const u
return ids;
}
test "capToolOutput: passes through when under cap" {
const ta = std.testing.allocator;
const out = capToolOutput(ta, "short");
try std.testing.expectEqualStrings("short", out);
}
// Boundary correctness lives in string.zig's `truncateUtf8` tests; here we only
// assert the agent-specific policy: an over-cap body keeps valid UTF-8 and gains
// the truncation marker.
test "capToolOutput: appends a marker when truncating" {
const ta = std.testing.allocator;
// 3-byte Hangul codepoint (U+D55C '한' = 0xED 0x95 0x9C) straddling the cap.
const cap = tool_output_max_bytes;
const buf = try ta.alloc(u8, cap + 8);
defer ta.free(buf);
@memset(buf[0 .. cap - 1], 'a');
buf[cap - 1] = 0xED;
buf[cap + 0] = 0x95;
buf[cap + 1] = 0x9C;
@memset(buf[cap + 2 ..], 'b');
const out = capToolOutput(ta, buf);
defer if (out.ptr != buf.ptr) ta.free(out);
try std.testing.expect(std.unicode.utf8ValidateSlice(out));
try std.testing.expect(std.mem.indexOf(u8, out, "truncated") != null);
}
test "parseSaveCommand: filename only" {
const r = try parseSaveCommand("out.js");
try std.testing.expectEqualStrings("out.js", r.filename.?);
@@ -1721,8 +1930,3 @@ test "renderBuiltinCatalog: lists recorded tools, omits read-only ones" {
try std.testing.expect(std.mem.indexOf(u8, text, "tree(") == null);
try std.testing.expect(std.mem.indexOf(u8, text, "markdown(") == null);
}
test "stripCodeFence: unwraps a fenced block and passes plain text through" {
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("```js\ngoto(\"x\");\n```"));
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("goto(\"x\");"));
}

View File

@@ -71,6 +71,12 @@ cv: std.Thread.Condition = .{},
state: State = .idle,
frame: u8 = 0,
/// Custom label for the thinking state — a phase like "writing the script".
/// Empty falls back to "thinking". Set via `setStatus`, cleared when the turn
/// ends so the next turn starts plain.
status_buf: [48]u8 = undefined,
status_len: usize = 0,
tool_calls: u32 = 0,
turn_started_ns: i128 = 0,
@@ -143,6 +149,7 @@ pub fn stop(self: *Spinner) void {
_ = std.posix.write(std.posix.STDERR_FILENO, summary) catch {};
self.state = .idle;
self.status_len = 0;
self.last_render_len = 0;
}
@@ -155,6 +162,7 @@ pub fn cancel(self: *Spinner) void {
if (self.state == .idle) return;
_ = std.posix.write(std.posix.STDERR_FILENO, "\r" ++ clear_eol) catch {};
self.state = .idle;
self.status_len = 0;
self.last_render_len = 0;
}
@@ -188,6 +196,22 @@ pub fn setTool(self: *Spinner, name: []const u8, args: []const u8) void {
self.cv.signal();
}
/// Label the thinking indicator with the current phase (e.g. "writing the
/// script"). Stored even while a tool label is up or before `start()`, so it
/// shows the moment the indicator next renders thinking. Cleared at turn end.
pub fn setStatus(self: *Spinner, text: []const u8) void {
if (!self.isEnabled()) return;
self.mu.lock();
defer self.mu.unlock();
const t = truncateUtf8(text, self.status_buf.len);
@memcpy(self.status_buf[0..t.len], t);
self.status_len = t.len;
if (self.state == .thinking) {
self.renderLocked();
self.cv.signal();
}
}
/// Request a transition back to the cycling "thinking" state. The worker
/// honors `min_tool_display_ns`: if the current tool label has not been up
/// long enough, the flip is deferred until it has.
@@ -253,11 +277,14 @@ fn renderLocked(self: *Spinner) void {
const glyph = braille[self.frame % braille.len];
const written = switch (self.state) {
.idle => return,
.thinking => std.fmt.bufPrint(
&buf,
"\r" ++ ansi.yellow ++ "{s}" ++ ansi.reset ++ " " ++ ansi.dim ++ "[agent: thinking]" ++ ansi.reset ++ clear_eol,
.{glyph},
) catch return,
.thinking => blk: {
const label = if (self.status_len > 0) self.status_buf[0..self.status_len] else "thinking";
break :blk std.fmt.bufPrint(
&buf,
"\r" ++ ansi.yellow ++ "{s}" ++ ansi.reset ++ " " ++ ansi.dim ++ "[agent: {s}]" ++ ansi.reset ++ clear_eol,
.{ glyph, label },
) catch return;
},
.tool => |tool| blk: {
const prefix: []const u8 = if (tool.manual) "" else "agent: ";
const name = tool.name_buf[0..tool.name_len];

View File

@@ -212,9 +212,18 @@ pub fn endTool(self: *Terminal) void {
/// text via the bullet character.
pub fn agentToolDone(self: *Terminal, name: []const u8, args: []const u8, ok: bool) void {
if (!self.verbosity.atLeast(.medium)) return;
const spinner_on = self.spinner.isEnabled();
self.emitToolBullet(name, args, ok);
}
if (spinner_on) {
/// Trace one `/save` candidate run. Unlike `agentToolDone` this is shown even at
/// the REPL's default `.low` verbosity: the verify loop is an infrequent,
/// user-initiated step the user needs to watch happen.
pub fn agentVerifyRun(self: *Terminal, summary: []const u8, ok: bool) void {
self.emitToolBullet("run_script", summary, ok);
}
fn emitToolBullet(self: *Terminal, name: []const u8, args: []const u8, ok: bool) void {
if (self.spinner.isEnabled()) {
const a = if (self.repl_arena) |*ra| ra else return;
defer _ = a.reset(.retain_capacity);
const bytes = formatBulletLine(a.allocator(), name, args, ok) catch return;

View File

@@ -144,37 +144,74 @@ pub const driver_guidance =
/// Shared: the agent's `/save` feeds it to its own LLM; the MCP `save` tool
/// hands it to the driving client as the tool description.
pub const save_synthesis_prompt =
\\Write a single Lightpanda agent script (.js) that reproduces what the user
\\set out to do this session. Infer the goal from the whole conversation and
\\keep only the steps a clean, repeatable script needs — drop failed attempts,
\\retries, exploratory reads (tree/markdown/extract probes), and corrections.
\\Pick the right layer for each step:
\\- builtins (goto, click, fill, extract, …) for actions and for reading data;
\\ extract is how you pull structured data out of a page.
\\- plain top-level JavaScript for logic — loops, cross-page aggregation,
\\ filtering, string building. It runs in the script, not the page.
\\- evaluate(...) only for page-side JavaScript no builtin can express. It is
\\ an escape hatch, not a default, and cannot see the script's variables —
\\ interpolate any value into its string.
\\Stay faithful to the recorded calls: same options each one actually used.
\\Do NOT add a `timeout` to goto (or any tool) unless the session
\\did. Never round-trip a result through `lp.*`, and never append no-op
\\extract(...) probes or `evaluate("return lp....")` tails to surface output.
\\The completion value — the last top-level expression — prints automatically
\\(objects and arrays as JSON), so end with the bare result expression: a final
\\`extract({...});`, or `results;` after an aggregation loop. No console.log,
\\JSON.stringify, or `return` (illegal at top level) needed.
\\Write modern, readable JavaScript: `for (const x of xs)`, `const`/`let` over
\\`var`, template literals, destructuring, 2-space indent (including multi-line
\\extract({...}) schemas).
\\The script runs as a classic script, so top-level `await` is a syntax error.
\\The builtins are synchronous — each returns its result directly, so never
\\wrap them in async/await, .then, or Promises (`const data = extract(...)`,
\\not `await extract(...)`). evaluate(...) may run async JS in the page, but
\\the call itself returns synchronously.
\\Output ONLY JavaScript source — no markdown fences, no commentary.
\\Write a single Lightpanda agent script (.js) that reproduces what the user set
\\out to do this session. Keep only the steps a clean, repeatable script needs —
\\drop failed attempts, retries, and exploratory probes.
\\Use the builtins for actions and data — extract is the main way to read data —
\\and plain top-level JavaScript for logic (loops, aggregation, filtering, string
\\work). Reserve evaluate(...) for advanced page interaction or page-side logic no
\\builtin can express; it can't see the script's variables, so interpolate them into
\\its string. Stay faithful to the recorded calls and their options (e.g. don't add
\\a timeout the session didn't use).
\\The last top-level expression prints automatically (objects/arrays as JSON), so
\\end with the bare result — a final extract({...}); or results; after a loop. No
\\console.log, JSON.stringify, or return.
\\Top-level await is a syntax error and the builtins are synchronous, so never await
\\them (const data = extract(...), not await extract(...)).
\\If an output schema is given below, the completion value MUST match it — parse or
\\split the extracted text as needed.
\\Write modern, readable JavaScript (const/let, template literals, destructuring,
\\2-space indent). Output ONLY JavaScript — no markdown fences, no commentary.
;
/// Agent `/save`, step 1: distill the session into a one-sentence intent that
/// feeds `save_schema_prompt`. Appended to the full conversation, so the model
/// reads the turns; an explicit anchor (a `--task` or `/save` prompt) is added
/// by the caller when present.
pub const save_intent_prompt =
\\In one sentence, state what the user set out to accomplish this session:
\\the goal and the data or outcome they wanted — not the individual steps,
\\tools, selectors, or page structure. Phrase it as a task description, e.g.
\\"Go to HackerNews and retrieve the top 10 stories with their last 3
\\comments (author and text)". Output ONLY that one sentence.
;
/// Agent `/save`, step 2: turn the step-1 intent into a logical output schema.
/// Given ONLY the intent — deliberately blind to the page and how data was
/// fetched — so the resulting shape is stable across runs of the same session.
pub const save_schema_prompt =
\\Generate the JSON output schema describing the following intent. Do not
\\focus on the intent context (the actual webpage structure or how to
\\retrieve the data), just the logical JSON schema example. Do not provide
\\actual data, just data types ("string", "number", "boolean").
\\Example: the intent "Go to HackerNews and retrieve the top 10 stories with
\\their last 3 comments with author and text" produces:
\\{"results": [{"title": "string", "url": "string", "comments": [{"author": "string", "text": "string"}]}]}
\\Output ONLY the JSON schema, no markdown fences, no commentary.
\\
\\Intent:
;
/// Name of the agent `/save` verification tool — the model calls it to run a
/// candidate script before finalizing. The agent dispatches it by this name.
pub const run_script_tool_name = "run_script";
const run_script_tool_desc =
"Run your full candidate script for real, from a blank page, and return its completion value " ++
"(or error) — exactly as it will run when saved and replayed. It must navigate itself with " ++
"goto(...). Use it to verify navigation, extraction, and your transform before finalizing.";
const run_script_params_json =
\\{"type":"object","properties":{"source":{"type":"string","description":"Full JavaScript source of the candidate script to execute."}},"required":["source"]}
;
/// The `run_script` tool definition for an LLM `/save` synthesis turn. `arena`
/// backs the parsed parameter schema, so it must outlive the `runTools` call.
pub fn runScriptToolDef(arena: std.mem.Allocator) !zenai.provider.Tool {
const params = try std.json.parseFromSliceLeaky(std.json.Value, arena, run_script_params_json, .{});
return .{ .name = run_script_tool_name, .description = run_script_tool_desc, .parameters = params };
}
/// Reject paths that an untrusted MCP client could use to escape the
/// working directory: empty paths, absolute paths, and any path with a
/// `..` segment. Operator-controlled symlinks already inside CWD are out

View File

@@ -95,6 +95,13 @@ pub const RunError = error{
OutOfMemory,
};
/// A captured script run: `err` is the formatted failure (null on success);
/// `output` is the completion value's display string (empty when void).
pub const RunOutcome = struct {
err: ?[]const u8 = null,
output: []const u8 = "",
};
pub fn init(
allocator: std.mem.Allocator,
app: *lp.App,
@@ -238,6 +245,23 @@ fn setObjectProperty(
/// compile/runtime exception returns a formatted error allocated in this
/// runtime's call arena and valid until deinit or the next run.
pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!?[]const u8 {
return (try self.runInner(source, name, false)).err;
}
/// Like `runSource`, but capture the completion value's display string instead
/// of printing it (used by `/save` verification to feed output back to the LLM).
/// Both fields live in the call arena — valid until the next run or deinit.
///
/// Each call gets a fresh global context so a prior candidate's top-level
/// `const`/`let` doesn't collide ("Identifier 'x' has already been declared")
/// — verification candidates are independent runs, unlike a `/load` script.
pub fn runSourceCapture(self: *Runtime, source: []const u8, name: []const u8) RunError!RunOutcome {
self.resetContext();
self.createContext() catch return .{ .err = try self.dupeError("script context reset failed") };
return self.runInner(source, name, true);
}
fn runInner(self: *Runtime, source: []const u8, name: []const u8, capture: bool) RunError!RunOutcome {
_ = self.call_arena.reset(.retain_capacity);
var hs: lp.js.HandleScope = undefined;
@@ -245,7 +269,7 @@ pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!
defer hs.deinit();
const context: *const v8.Context = @ptrCast(v8.v8__Global__Get(&self.context, self.env.isolate.handle) orelse
return try self.dupeError("agent script context is not available"));
return .{ .err = try self.dupeError("agent script context is not available") });
v8.v8__Context__Enter(context);
defer v8.v8__Context__Exit(context);
@@ -268,19 +292,27 @@ pub fn runSource(self: *Runtime, source: []const u8, name: []const u8) RunError!
&compiler_source,
v8.kNoCompileOptions,
v8.kNoCacheNoReason,
) orelse return try self.formatCaught(context, &try_catch, "compile failed");
) orelse return .{ .err = try self.formatCaught(context, &try_catch, "compile failed") };
const completion = v8.v8__Script__Run(script, context) orelse
return try self.formatCaught(context, &try_catch, "script failed");
return .{ .err = try self.formatCaught(context, &try_catch, "script failed") };
// Explicit microtask policy: promise continuations only run once drained.
self.env.performIsolateMicrotasks();
if (v8.v8__TryCatch__HasCaught(&try_catch)) {
return try self.formatCaught(context, &try_catch, "script failed");
return .{ .err = try self.formatCaught(context, &try_catch, "script failed") };
}
if (capture) {
if (v8.v8__Value__IsUndefined(completion)) return .{};
const output = self.displayString(self.call_arena.allocator(), context, completion) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.JsException => return .{ .output = "<completion value could not be serialized>" },
};
return .{ .output = output };
}
self.printCompletion(context, completion);
return null;
return .{};
}
/// Echo a script's completion value (its last-evaluated expression) so a script
@@ -677,10 +709,10 @@ test "agent script runtime: goto and evaluate dispatch through browser tools" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -698,10 +730,10 @@ test "agent script runtime: extract returns a JavaScript object" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -752,10 +784,10 @@ test "agent script runtime: extract tolerates list selectors that match nothing"
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -780,10 +812,10 @@ test "agent script runtime: strict-mode scripts can call primitives" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -798,10 +830,10 @@ test "agent script runtime: strict-mode scripts can call primitives" {
test "agent script runtime: promise microtasks run to completion" {
defer testing.reset();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -819,10 +851,10 @@ test "agent script runtime: primitives re-entered from argument callbacks stay i
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -841,10 +873,10 @@ test "agent script runtime: terminate interrupts local JavaScript" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
const thread = try std.Thread.spawn(.{}, terminateRuntimeSoon, .{runtime});
@@ -859,10 +891,10 @@ test "agent script runtime: agent variables persist and page globals are isolate
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -884,10 +916,10 @@ test "agent script runtime: page evaluate cannot see agent primitives or binding
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -902,10 +934,10 @@ test "agent script runtime: page evaluate cannot see agent primitives or binding
test "agent script runtime: console is available in agent context" {
defer testing.reset();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -919,10 +951,10 @@ test "agent script runtime: tool errors throw and stop execution" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
const message = (try runtime.runSource(
@@ -945,10 +977,10 @@ test "agent script runtime: builtin argument marshalling (positional + options)"
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry = CDPNode.Registry.init(testing.allocator);
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime = try Runtime.init(testing.allocator, testing.test_app, testing.test_session, &registry);
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
try runTestScript(runtime,
@@ -994,3 +1026,72 @@ test "agent script runtime: builtin argument marshalling (positional + options)"
try testing.expect(std.mem.indexOf(u8, message, "invalid arguments") != null);
}
}
test "agent script runtime: runSourceCapture runs the full script live and captures completion" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
// Verification runs the candidate exactly as a standalone replay does — it
// must navigate itself; extract then runs against the page it loaded.
const outcome = try runtime.runSourceCapture(
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
\\click("#btn");
\\const data = extract({ label: "#btn" });
\\data.label;
, "candidate.js");
try testing.expect(outcome.err == null);
try std.testing.expectEqualStrings("Click Me", outcome.output);
}
test "agent script runtime: runSourceCapture surfaces a candidate's error" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
// extract returns an object; treating it as an array throws — the real
// `raw.map is not a function` failure the model must see and fix.
const bad = try runtime.runSourceCapture(
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
\\const raw = extract({ items: [{ selector: "li" }] });
\\raw.map(x => x);
, "candidate.js");
try testing.expect(bad.err != null);
}
test "agent script runtime: each capture run gets a fresh global scope" {
defer testing.reset();
defer if (testing.test_session.hasPage()) testing.test_session.removePage();
var registry: CDPNode.Registry = .init(testing.allocator);
defer registry.deinit();
const runtime: *Runtime = try .init(testing.allocator, testing.test_app, testing.test_session, &registry);
defer runtime.deinit();
// The same top-level `const` in two consecutive candidates must not collide —
// each run starts from a clean context, not the previous run's globals.
const src =
\\goto("http://localhost:9582/src/browser/tests/mcp_actions.html");
\\const data = extract({ label: "#btn" });
\\const out = data.label;
\\out;
;
const first = try runtime.runSourceCapture(src, "candidate.js");
try testing.expect(first.err == null);
try std.testing.expectEqualStrings("Click Me", first.output);
const second = try runtime.runSourceCapture(src, "candidate.js");
try testing.expect(second.err == null);
try std.testing.expectEqualStrings("Click Me", second.output);
}

View File

@@ -332,6 +332,29 @@ pub fn truncateUtf8(bytes: []const u8, max_bytes: usize) []const u8 {
return bytes[0..i];
}
/// Truncate `text` to at most `max_bytes` on a UTF-8 boundary and, when it
/// overflows, append a marker noting the original length. Returns `text`
/// unchanged when it fits; otherwise allocates the marked result in `allocator`
/// (falling back to the bare prefix if that allocation fails).
pub fn truncateWithMarker(allocator: std.mem.Allocator, text: []const u8, max_bytes: usize) []const u8 {
if (text.len <= max_bytes) return text;
const prefix = truncateUtf8(text, max_bytes);
var suffix_buf: [64]u8 = undefined;
const suffix = std.fmt.bufPrint(&suffix_buf, "\n...[truncated, original {d} bytes]", .{text.len}) catch return prefix;
return std.mem.concat(allocator, u8, &.{ prefix, suffix }) catch prefix;
}
/// Strip a surrounding ```lang … ``` markdown fence if the text is wrapped in
/// one, returning the inner block; passes already-bare text through unchanged.
pub fn stripCodeFence(text: []const u8) []const u8 {
const t = std.mem.trim(u8, text, &std.ascii.whitespace);
if (!std.mem.startsWith(u8, t, "```")) return t;
const first_nl = std.mem.indexOfScalar(u8, t, '\n') orelse return t;
const body = t[first_nl + 1 ..];
const close = std.mem.lastIndexOf(u8, body, "```") orelse return std.mem.trim(u8, body, &std.ascii.whitespace);
return std.mem.trim(u8, body[0..close], &std.ascii.whitespace);
}
// Discriminatory type that signals the bridge to use arena instead of call_arena
// Use this for strings that need to persist beyond the current call
// The caller can unwrap and store just the underlying .str field
@@ -378,6 +401,32 @@ test "truncateUtf8" {
try testing.expectEqual("\xFFx", truncateUtf8("\xFFx", 2));
}
test "truncateWithMarker" {
const ta = std.testing.allocator;
try std.testing.expectEqualStrings("short", truncateWithMarker(ta, "short", 1024));
// Over-cap: a 3-byte Hangul codepoint (U+D55C, 0xED 0x95 0x9C) straddling the
// cap must stay valid UTF-8, and the marker must be appended.
const cap: usize = 1024;
const buf = try ta.alloc(u8, cap + 8);
defer ta.free(buf);
@memset(buf[0 .. cap - 1], 'a');
buf[cap - 1] = 0xED;
buf[cap + 0] = 0x95;
buf[cap + 1] = 0x9C;
@memset(buf[cap + 2 ..], 'b');
const out = truncateWithMarker(ta, buf, cap);
defer if (out.ptr != buf.ptr) ta.free(out);
try std.testing.expect(std.unicode.utf8ValidateSlice(out));
try std.testing.expect(std.mem.indexOf(u8, out, "truncated") != null);
}
test "stripCodeFence" {
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("```js\ngoto(\"x\");\n```"));
try std.testing.expectEqualStrings("goto(\"x\");", stripCodeFence("goto(\"x\");"));
}
test "String" {
const other_short = try String.init(undefined, "other_short", .{});
const other_long = try String.init(testing.allocator, "other_long" ** 100, .{});