From 9ac2c03a7e82e59d397b1e48a39ec1b144c4e9df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 14 May 2026 11:14:11 +0200 Subject: [PATCH] tools: unify eval and extract tool dispatching - Add `callEvalLike` to consolidate tools returning `EvalResult`. - Update `Agent` and `MCP` to use the new helper. - Route failed data commands to stderr in `CommandExecutor`. - Update CSS selector guidance in `script.zig`. --- src/agent/Agent.zig | 66 ++++++++++++++++------------------- src/agent/CommandExecutor.zig | 10 +++--- src/browser/tools.zig | 17 +++++++++ src/mcp/tools.zig | 13 ++----- src/script.zig | 4 +++ 5 files changed, 59 insertions(+), 51 deletions(-) diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index 6b192f19..7084b889 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -428,30 +428,31 @@ fn handleSlash(self: *Agent, body: []const u8) bool { return false; }; - if (std.mem.eql(u8, schema.tool_name, @tagName(lp.tools.Action.eval))) { - // callEval surfaces JS errors separately from operational errors; - // tool_executor.call collapses them. - const eval_script = extractEvalScript(aa, args_json) catch { - self.terminal.printError("eval requires a `script` argument."); - return false; - }; - self.terminal.beginTool(schema.tool_name, rest); - const result = self.tool_executor.callEval(aa, eval_script) catch |err| { - self.terminal.endTool(false); - self.terminal.printErrorFmt("eval: {s}", .{@errorName(err)}); - return false; - }; - switch (result) { - .ok => |text| { - self.terminal.endTool(true); - self.terminal.printToolResult(schema.tool_name, text); - }, - .js_error => |text| { + if (std.meta.stringToEnum(lp.tools.Action, schema.tool_name)) |action| { + const parsed_args: ?std.json.Value = if (args_json.len == 0) null else + std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch { + self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name}); + return false; + }; + if (lp.tools.callEvalLike(aa, self.tool_executor.session, &self.tool_executor.node_registry, action, parsed_args)) |outcome| { + self.terminal.beginTool(schema.tool_name, rest); + const result = outcome catch |err| { self.terminal.endTool(false); - self.terminal.printErrorFmt("eval: {s}", .{text}); - }, + self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, @errorName(err) }); + return false; + }; + switch (result) { + .ok => |text| { + self.terminal.endTool(true); + self.terminal.printToolResult(schema.tool_name, text); + }, + .js_error => |text| { + self.terminal.endTool(false); + self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, text }); + }, + } + return false; } - return false; } self.terminal.beginTool(schema.tool_name, rest); @@ -526,12 +527,6 @@ fn firstSentence(text: []const u8) []const u8 { return text; } -fn extractEvalScript(arena: std.mem.Allocator, args_json: []const u8) ![]const u8 { - if (args_json.len == 0) return error.MissingScript; - const parsed = std.json.parseFromSliceLeaky(struct { script: []const u8 }, arena, args_json, .{ .ignore_unknown_fields = true }) catch return error.MissingScript; - return parsed.script; -} - const Replacement = script.Replacement; fn runScript(self: *Agent, path: []const u8) bool { @@ -1102,13 +1097,14 @@ fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []co self.terminal.spinner.setTool(tool_name, args_str); defer self.terminal.spinner.setThinking(); - // `eval` is the one tool whose body distinguishes "JS threw" from "JS - // returned a value" — both produce text. `callValue` collapses them - // (the LLM would then see a JS error as a successful tool result). - // Route eval through `callEval` so the JS-error path sets is_error=true, - // letting the model self-correct. - if (std.mem.eql(u8, tool_name, @tagName(lp.tools.Action.eval))) { - const result = lp.tools.callEval(allocator, self.tool_executor.session, &self.tool_executor.node_registry, arguments) catch |err| { + // V8 throw/return must surface in `is_error` — the recorder gates on it. + const action = std.meta.stringToEnum(lp.tools.Action, tool_name); + const eval_like = if (action) |a| + lp.tools.callEvalLike(allocator, self.tool_executor.session, &self.tool_executor.node_registry, a, arguments) + else + null; + if (eval_like) |outcome| { + const result = outcome catch |err| { const msg = std.fmt.allocPrint(allocator, "Error: {s}", .{@errorName(err)}) catch "Error: tool execution failed"; self.terminal.agentToolDone(tool_name, args_str, false); if (self.terminal.verbosity == .high) self.terminal.printToolResult(tool_name, msg); diff --git a/src/agent/CommandExecutor.zig b/src/agent/CommandExecutor.zig index 00bd6124..5a2fdd8c 100644 --- a/src/agent/CommandExecutor.zig +++ b/src/agent/CommandExecutor.zig @@ -59,10 +59,10 @@ pub fn executeWithResult(self: *Self, arena: std.mem.Allocator, cmd: Command.Com return .{ .output = std.fmt.allocPrint(arena, "{s} failed: {s}", .{ tc.name, @errorName(err) }) catch "tool failed", .failed = true }; } -/// Data-producing commands (EXTRACT/EVAL/MARKDOWN/TREE) go to stdout so shell -/// redirection captures only their output; action commands go to stderr. +/// Data output (EXTRACT/EVAL/MARKDOWN/TREE) → stdout on success; everything +/// else, including failures from those same commands, → stderr. pub fn printResult(self: *Self, cmd: Command.Command, result: ExecResult) void { - if (cmd.producesData()) { + if (cmd.producesData() and !result.failed) { self.terminal.printAssistant(result.output); } else { self.terminal.printActionResult(result.output); @@ -75,9 +75,7 @@ fn execExtract(self: *Self, arena: std.mem.Allocator, raw_schema: []const u8) Ex return evalLikeResult(self.tool_executor.extract(arena, schema)); } -/// Collapse an `EvalResult` into an `ExecResult` while preserving `isError`: -/// V8 throws would otherwise round-trip as `failed = false` through the -/// generic `[]const u8` path. +/// `EvalResult` → `ExecResult`, preserving `isError` (the generic text path drops it). fn evalLikeResult(result: browser_tools.ToolError!browser_tools.EvalResult) ExecResult { const r = result catch |err| return .{ .output = @errorName(err), .failed = true }; return .{ .output = r.text(), .failed = r.isError() }; diff --git a/src/browser/tools.zig b/src/browser/tools.zig index b45b7ce5..e35b31df 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -495,6 +495,23 @@ pub fn callExtract( return execExtract(arena, session, registry, arguments); } +/// Dispatch the eval-like subset (`eval`, `extract`) — they return +/// `EvalResult` so JS errors stay distinguishable. Returns null for other +/// actions so callers fall through to `call()`. +pub fn callEvalLike( + arena: std.mem.Allocator, + session: *lp.Session, + registry: *CDPNode.Registry, + action: Action, + arguments: ?std.json.Value, +) ?(ToolError!EvalResult) { + return switch (action) { + .eval => callEval(arena, session, registry, arguments), + .extract => callExtract(arena, session, registry, arguments), + else => null, + }; +} + /// Run JavaScript against the current page, skipping the JSON parameter /// round-trip that `callEval` requires. The script need not be 0-terminated; /// a copy is made internally. diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index df5c8f95..840c0479 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -163,12 +163,7 @@ fn dispatchBrowserTool( }; // JS errors are returned as isError tool results, not protocol errors - const eval_outcome: ?(browser_tools.ToolError!browser_tools.EvalResult) = switch (action) { - .eval => browser_tools.callEval(arena, server.session, &server.node_registry, arguments), - .extract => browser_tools.callExtract(arena, server.session, &server.node_registry, arguments), - else => null, - }; - if (eval_outcome) |outcome| { + if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, arguments)) |outcome| { if (outcome) |r| { if (!r.isError()) recordIfActive(server, name, arguments); } else |_| {} @@ -282,10 +277,8 @@ fn handleScriptStep(server: *Server, arena: std.mem.Allocator, id: std.json.Valu return sendErrorContent(server, id, "internal: unknown action from Command.toToolCall"); }; - switch (action) { - .eval => return sendEvalOutcome(server, id, browser_tools.callEval(arena, server.session, &server.node_registry, tc.args)), - .extract => return sendEvalOutcome(server, id, browser_tools.callExtract(arena, server.session, &server.node_registry, tc.args)), - else => {}, + if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, tc.args)) |outcome| { + return sendEvalOutcome(server, id, outcome); } const result = browser_tools.call(arena, server.session, &server.node_registry, tc.name, tc.args) catch |err| { diff --git a/src/script.zig b/src/script.zig index 6110ba12..a26b9d24 100644 --- a/src/script.zig +++ b/src/script.zig @@ -76,6 +76,10 @@ pub const mcp_driver_guidance = \\ distinguishing attributes like value, name, or position to avoid \\ ambiguity. Example: input[type="submit"][value="login"], NOT just \\ input[type="submit"]. + \\- Use standard CSS selectors only. jQuery's `:contains()` and + \\ Playwright's `:has-text()` are not supported and raise a SyntaxError. + \\ To target by visible text, inspect with `tree` or `markdown` first to + \\ find the id/class/structure, then write a plain selector against that. \\ \\Credentials: \\- When filling credentials, pass environment variable references like