tools: unify eval and extract tool dispatching

- Add `callEvalLike` to consolidate tools returning `EvalResult`.
- Update `Agent` and `MCP` to use the new helper.
- Route failed data commands to stderr in `CommandExecutor`.
- Update CSS selector guidance in `script.zig`.
This commit is contained in:
Adrià Arrufat
2026-05-14 11:14:11 +02:00
parent faba86cf82
commit 9ac2c03a7e
5 changed files with 59 additions and 51 deletions

View File

@@ -428,30 +428,31 @@ fn handleSlash(self: *Agent, body: []const u8) bool {
return false;
};
if (std.mem.eql(u8, schema.tool_name, @tagName(lp.tools.Action.eval))) {
// callEval surfaces JS errors separately from operational errors;
// tool_executor.call collapses them.
const eval_script = extractEvalScript(aa, args_json) catch {
self.terminal.printError("eval requires a `script` argument.");
return false;
};
self.terminal.beginTool(schema.tool_name, rest);
const result = self.tool_executor.callEval(aa, eval_script) catch |err| {
self.terminal.endTool(false);
self.terminal.printErrorFmt("eval: {s}", .{@errorName(err)});
return false;
};
switch (result) {
.ok => |text| {
self.terminal.endTool(true);
self.terminal.printToolResult(schema.tool_name, text);
},
.js_error => |text| {
if (std.meta.stringToEnum(lp.tools.Action, schema.tool_name)) |action| {
const parsed_args: ?std.json.Value = if (args_json.len == 0) null else
std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
return false;
};
if (lp.tools.callEvalLike(aa, self.tool_executor.session, &self.tool_executor.node_registry, action, parsed_args)) |outcome| {
self.terminal.beginTool(schema.tool_name, rest);
const result = outcome catch |err| {
self.terminal.endTool(false);
self.terminal.printErrorFmt("eval: {s}", .{text});
},
self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, @errorName(err) });
return false;
};
switch (result) {
.ok => |text| {
self.terminal.endTool(true);
self.terminal.printToolResult(schema.tool_name, text);
},
.js_error => |text| {
self.terminal.endTool(false);
self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, text });
},
}
return false;
}
return false;
}
self.terminal.beginTool(schema.tool_name, rest);
@@ -526,12 +527,6 @@ fn firstSentence(text: []const u8) []const u8 {
return text;
}
fn extractEvalScript(arena: std.mem.Allocator, args_json: []const u8) ![]const u8 {
if (args_json.len == 0) return error.MissingScript;
const parsed = std.json.parseFromSliceLeaky(struct { script: []const u8 }, arena, args_json, .{ .ignore_unknown_fields = true }) catch return error.MissingScript;
return parsed.script;
}
const Replacement = script.Replacement;
fn runScript(self: *Agent, path: []const u8) bool {
@@ -1102,13 +1097,14 @@ fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []co
self.terminal.spinner.setTool(tool_name, args_str);
defer self.terminal.spinner.setThinking();
// `eval` is the one tool whose body distinguishes "JS threw" from "JS
// returned a value" — both produce text. `callValue` collapses them
// (the LLM would then see a JS error as a successful tool result).
// Route eval through `callEval` so the JS-error path sets is_error=true,
// letting the model self-correct.
if (std.mem.eql(u8, tool_name, @tagName(lp.tools.Action.eval))) {
const result = lp.tools.callEval(allocator, self.tool_executor.session, &self.tool_executor.node_registry, arguments) catch |err| {
// V8 throw/return must surface in `is_error` — the recorder gates on it.
const action = std.meta.stringToEnum(lp.tools.Action, tool_name);
const eval_like = if (action) |a|
lp.tools.callEvalLike(allocator, self.tool_executor.session, &self.tool_executor.node_registry, a, arguments)
else
null;
if (eval_like) |outcome| {
const result = outcome catch |err| {
const msg = std.fmt.allocPrint(allocator, "Error: {s}", .{@errorName(err)}) catch "Error: tool execution failed";
self.terminal.agentToolDone(tool_name, args_str, false);
if (self.terminal.verbosity == .high) self.terminal.printToolResult(tool_name, msg);

View File

@@ -59,10 +59,10 @@ pub fn executeWithResult(self: *Self, arena: std.mem.Allocator, cmd: Command.Com
return .{ .output = std.fmt.allocPrint(arena, "{s} failed: {s}", .{ tc.name, @errorName(err) }) catch "tool failed", .failed = true };
}
/// Data-producing commands (EXTRACT/EVAL/MARKDOWN/TREE) go to stdout so shell
/// redirection captures only their output; action commands go to stderr.
/// Data output (EXTRACT/EVAL/MARKDOWN/TREE) stdout on success; everything
/// else, including failures from those same commands, → stderr.
pub fn printResult(self: *Self, cmd: Command.Command, result: ExecResult) void {
if (cmd.producesData()) {
if (cmd.producesData() and !result.failed) {
self.terminal.printAssistant(result.output);
} else {
self.terminal.printActionResult(result.output);
@@ -75,9 +75,7 @@ fn execExtract(self: *Self, arena: std.mem.Allocator, raw_schema: []const u8) Ex
return evalLikeResult(self.tool_executor.extract(arena, schema));
}
/// Collapse an `EvalResult` into an `ExecResult` while preserving `isError`:
/// V8 throws would otherwise round-trip as `failed = false` through the
/// generic `[]const u8` path.
/// `EvalResult` `ExecResult`, preserving `isError` (the generic text path drops it).
fn evalLikeResult(result: browser_tools.ToolError!browser_tools.EvalResult) ExecResult {
const r = result catch |err| return .{ .output = @errorName(err), .failed = true };
return .{ .output = r.text(), .failed = r.isError() };

View File

@@ -495,6 +495,23 @@ pub fn callExtract(
return execExtract(arena, session, registry, arguments);
}
/// Dispatch the eval-like subset (`eval`, `extract`) — they return
/// `EvalResult` so JS errors stay distinguishable. Returns null for other
/// actions so callers fall through to `call()`.
pub fn callEvalLike(
arena: std.mem.Allocator,
session: *lp.Session,
registry: *CDPNode.Registry,
action: Action,
arguments: ?std.json.Value,
) ?(ToolError!EvalResult) {
return switch (action) {
.eval => callEval(arena, session, registry, arguments),
.extract => callExtract(arena, session, registry, arguments),
else => null,
};
}
/// Run JavaScript against the current page, skipping the JSON parameter
/// round-trip that `callEval` requires. The script need not be 0-terminated;
/// a copy is made internally.

View File

@@ -163,12 +163,7 @@ fn dispatchBrowserTool(
};
// JS errors are returned as isError tool results, not protocol errors
const eval_outcome: ?(browser_tools.ToolError!browser_tools.EvalResult) = switch (action) {
.eval => browser_tools.callEval(arena, server.session, &server.node_registry, arguments),
.extract => browser_tools.callExtract(arena, server.session, &server.node_registry, arguments),
else => null,
};
if (eval_outcome) |outcome| {
if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, arguments)) |outcome| {
if (outcome) |r| {
if (!r.isError()) recordIfActive(server, name, arguments);
} else |_| {}
@@ -282,10 +277,8 @@ fn handleScriptStep(server: *Server, arena: std.mem.Allocator, id: std.json.Valu
return sendErrorContent(server, id, "internal: unknown action from Command.toToolCall");
};
switch (action) {
.eval => return sendEvalOutcome(server, id, browser_tools.callEval(arena, server.session, &server.node_registry, tc.args)),
.extract => return sendEvalOutcome(server, id, browser_tools.callExtract(arena, server.session, &server.node_registry, tc.args)),
else => {},
if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, tc.args)) |outcome| {
return sendEvalOutcome(server, id, outcome);
}
const result = browser_tools.call(arena, server.session, &server.node_registry, tc.name, tc.args) catch |err| {

View File

@@ -76,6 +76,10 @@ pub const mcp_driver_guidance =
\\ distinguishing attributes like value, name, or position to avoid
\\ ambiguity. Example: input[type="submit"][value="login"], NOT just
\\ input[type="submit"].
\\- Use standard CSS selectors only. jQuery's `:contains()` and
\\ Playwright's `:has-text()` are not supported and raise a SyntaxError.
\\ To target by visible text, inspect with `tree` or `markdown` first to
\\ find the id/class/structure, then write a plain selector against that.
\\
\\Credentials:
\\- When filling credentials, pass environment variable references like