tools: unify eval and extract tool dispatching

- Add `callEvalLike` to consolidate tools returning `EvalResult`. - Update `Agent` and `MCP` to use the new helper. - Route failed data commands to stderr in `CommandExecutor`. - Update CSS selector guidance in `script.zig`.
2026-06-11 09:35:59 -04:00 · 2026-05-14 11:14:11 +02:00
parent faba86cf82
commit 9ac2c03a7e
5 changed files with 59 additions and 51 deletions
--- a/src/agent/Agent.zig
+++ b/src/agent/Agent.zig
@@ -428,30 +428,31 @@ fn handleSlash(self: *Agent, body: []const u8) bool {
        return false;
    };

-    if (std.mem.eql(u8, schema.tool_name, @tagName(lp.tools.Action.eval))) {
-        // callEval surfaces JS errors separately from operational errors;
-        // tool_executor.call collapses them.
-        const eval_script = extractEvalScript(aa, args_json) catch {
-            self.terminal.printError("eval requires a `script` argument.");
-            return false;
-        };
-        self.terminal.beginTool(schema.tool_name, rest);
-        const result = self.tool_executor.callEval(aa, eval_script) catch |err| {
-            self.terminal.endTool(false);
-            self.terminal.printErrorFmt("eval: {s}", .{@errorName(err)});
-            return false;
-        };
-        switch (result) {
-            .ok => |text| {
-                self.terminal.endTool(true);
-                self.terminal.printToolResult(schema.tool_name, text);
-            },
-            .js_error => |text| {
+    if (std.meta.stringToEnum(lp.tools.Action, schema.tool_name)) |action| {
+        const parsed_args: ?std.json.Value = if (args_json.len == 0) null else
+            std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
+                self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
+                return false;
+            };
+        if (lp.tools.callEvalLike(aa, self.tool_executor.session, &self.tool_executor.node_registry, action, parsed_args)) |outcome| {
+            self.terminal.beginTool(schema.tool_name, rest);
+            const result = outcome catch |err| {
                self.terminal.endTool(false);
-                self.terminal.printErrorFmt("eval: {s}", .{text});
-            },
+                self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, @errorName(err) });
+                return false;
+            };
+            switch (result) {
+                .ok => |text| {
+                    self.terminal.endTool(true);
+                    self.terminal.printToolResult(schema.tool_name, text);
+                },
+                .js_error => |text| {
+                    self.terminal.endTool(false);
+                    self.terminal.printErrorFmt("{s}: {s}", .{ schema.tool_name, text });
+                },
+            }
+            return false;
        }
-        return false;
    }

    self.terminal.beginTool(schema.tool_name, rest);
@@ -526,12 +527,6 @@ fn firstSentence(text: []const u8) []const u8 {
    return text;
 }

-fn extractEvalScript(arena: std.mem.Allocator, args_json: []const u8) ![]const u8 {
-    if (args_json.len == 0) return error.MissingScript;
-    const parsed = std.json.parseFromSliceLeaky(struct { script: []const u8 }, arena, args_json, .{ .ignore_unknown_fields = true }) catch return error.MissingScript;
-    return parsed.script;
-}
-
 const Replacement = script.Replacement;

 fn runScript(self: *Agent, path: []const u8) bool {
@@ -1102,13 +1097,14 @@ fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []co
    self.terminal.spinner.setTool(tool_name, args_str);
    defer self.terminal.spinner.setThinking();

-    // `eval` is the one tool whose body distinguishes "JS threw" from "JS
-    // returned a value" — both produce text. `callValue` collapses them
-    // (the LLM would then see a JS error as a successful tool result).
-    // Route eval through `callEval` so the JS-error path sets is_error=true,
-    // letting the model self-correct.
-    if (std.mem.eql(u8, tool_name, @tagName(lp.tools.Action.eval))) {
-        const result = lp.tools.callEval(allocator, self.tool_executor.session, &self.tool_executor.node_registry, arguments) catch |err| {
+    // V8 throw/return must surface in `is_error` — the recorder gates on it.
+    const action = std.meta.stringToEnum(lp.tools.Action, tool_name);
+    const eval_like = if (action) |a|
+        lp.tools.callEvalLike(allocator, self.tool_executor.session, &self.tool_executor.node_registry, a, arguments)
+    else
+        null;
+    if (eval_like) |outcome| {
+        const result = outcome catch |err| {
            const msg = std.fmt.allocPrint(allocator, "Error: {s}", .{@errorName(err)}) catch "Error: tool execution failed";
            self.terminal.agentToolDone(tool_name, args_str, false);
            if (self.terminal.verbosity == .high) self.terminal.printToolResult(tool_name, msg);
--- a/src/agent/CommandExecutor.zig
+++ b/src/agent/CommandExecutor.zig
@@ -59,10 +59,10 @@ pub fn executeWithResult(self: *Self, arena: std.mem.Allocator, cmd: Command.Com
        return .{ .output = std.fmt.allocPrint(arena, "{s} failed: {s}", .{ tc.name, @errorName(err) }) catch "tool failed", .failed = true };
 }

-/// Data-producing commands (EXTRACT/EVAL/MARKDOWN/TREE) go to stdout so shell
-/// redirection captures only their output; action commands go to stderr.
+/// Data output (EXTRACT/EVAL/MARKDOWN/TREE) → stdout on success; everything
+/// else, including failures from those same commands, → stderr.
 pub fn printResult(self: *Self, cmd: Command.Command, result: ExecResult) void {
-    if (cmd.producesData()) {
+    if (cmd.producesData() and !result.failed) {
        self.terminal.printAssistant(result.output);
    } else {
        self.terminal.printActionResult(result.output);
@@ -75,9 +75,7 @@ fn execExtract(self: *Self, arena: std.mem.Allocator, raw_schema: []const u8) Ex
    return evalLikeResult(self.tool_executor.extract(arena, schema));
 }

-/// Collapse an `EvalResult` into an `ExecResult` while preserving `isError`:
-/// V8 throws would otherwise round-trip as `failed = false` through the
-/// generic `[]const u8` path.
+/// `EvalResult` → `ExecResult`, preserving `isError` (the generic text path drops it).
 fn evalLikeResult(result: browser_tools.ToolError!browser_tools.EvalResult) ExecResult {
    const r = result catch |err| return .{ .output = @errorName(err), .failed = true };
    return .{ .output = r.text(), .failed = r.isError() };
--- a/src/browser/tools.zig
+++ b/src/browser/tools.zig
@@ -495,6 +495,23 @@ pub fn callExtract(
    return execExtract(arena, session, registry, arguments);
 }

+/// Dispatch the eval-like subset (`eval`, `extract`) — they return
+/// `EvalResult` so JS errors stay distinguishable. Returns null for other
+/// actions so callers fall through to `call()`.
+pub fn callEvalLike(
+    arena: std.mem.Allocator,
+    session: *lp.Session,
+    registry: *CDPNode.Registry,
+    action: Action,
+    arguments: ?std.json.Value,
+) ?(ToolError!EvalResult) {
+    return switch (action) {
+        .eval => callEval(arena, session, registry, arguments),
+        .extract => callExtract(arena, session, registry, arguments),
+        else => null,
+    };
+}
+
 /// Run JavaScript against the current page, skipping the JSON parameter
 /// round-trip that `callEval` requires. The script need not be 0-terminated;
 /// a copy is made internally.
--- a/src/mcp/tools.zig
+++ b/src/mcp/tools.zig
@@ -163,12 +163,7 @@ fn dispatchBrowserTool(
    };

    // JS errors are returned as isError tool results, not protocol errors
-    const eval_outcome: ?(browser_tools.ToolError!browser_tools.EvalResult) = switch (action) {
-        .eval => browser_tools.callEval(arena, server.session, &server.node_registry, arguments),
-        .extract => browser_tools.callExtract(arena, server.session, &server.node_registry, arguments),
-        else => null,
-    };
-    if (eval_outcome) |outcome| {
+    if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, arguments)) |outcome| {
        if (outcome) |r| {
            if (!r.isError()) recordIfActive(server, name, arguments);
        } else |_| {}
@@ -282,10 +277,8 @@ fn handleScriptStep(server: *Server, arena: std.mem.Allocator, id: std.json.Valu
        return sendErrorContent(server, id, "internal: unknown action from Command.toToolCall");
    };

-    switch (action) {
-        .eval => return sendEvalOutcome(server, id, browser_tools.callEval(arena, server.session, &server.node_registry, tc.args)),
-        .extract => return sendEvalOutcome(server, id, browser_tools.callExtract(arena, server.session, &server.node_registry, tc.args)),
-        else => {},
+    if (browser_tools.callEvalLike(arena, server.session, &server.node_registry, action, tc.args)) |outcome| {
+        return sendEvalOutcome(server, id, outcome);
    }

    const result = browser_tools.call(arena, server.session, &server.node_registry, tc.name, tc.args) catch |err| {
--- a/src/script.zig
+++ b/src/script.zig
@@ -76,6 +76,10 @@ pub const mcp_driver_guidance =
    \\  distinguishing attributes like value, name, or position to avoid
    \\  ambiguity. Example: input[type="submit"][value="login"], NOT just
    \\  input[type="submit"].
+    \\- Use standard CSS selectors only. jQuery's `:contains()` and
+    \\  Playwright's `:has-text()` are not supported and raise a SyntaxError.
+    \\  To target by visible text, inspect with `tree` or `markdown` first to
+    \\  find the id/class/structure, then write a plain selector against that.
    \\
    \\Credentials:
    \\- When filling credentials, pass environment variable references like