From 8abbd6d9c092c15f4354702fa2fa1d6627e8fc11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Mon, 25 May 2026 19:25:58 +0200 Subject: [PATCH] Script: handle duplicate argument keys and URL query params --- src/agent/Agent.zig | 11 +++++++---- src/browser/tools.zig | 13 +++++++++---- src/mcp/tools.zig | 9 +++++---- src/script.zig | 20 +++++++++++++------- src/script/Schema.zig | 40 +++++++++++++++++++++++++++++++++++----- 5 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index fbe12091..7e2e6dc7 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -603,6 +603,7 @@ fn printSlashParseError(self: *Agent, err: Schema.ParseError, name: []const u8, error.MissingRequired => "missing required argument", error.MalformedKv => "malformed key=value. Use key=value or {json}", error.UnknownField => "unknown field (typo?)", + error.DuplicateField => "the same field was supplied twice (check for case-variants like Selector vs selector)", error.PositionalNotAllowed => "positional only works for tools with one required field. Use key=value", error.UnterminatedQuote => "unterminated quote", error.UnsupportedEscape => "backslash escapes aren't supported in quoted values; use the other quote style or `'''…'''`", @@ -716,8 +717,9 @@ fn runScript(self: *Agent, path: []const u8) bool { switch (self.runActionEntry(sa, entry, last_comment)) { .ok => {}, .healed => |r| replacements.append(sa, r) catch |err| { + self.flushReplacements(path, content, replacements.items); self.terminal.printError( - "line {d}: out of memory recording heal: {s} (script left unchanged)", + "line {d}: out of memory recording heal: {s}", .{ entry.line_num, @errorName(err) }, ); return false; @@ -817,8 +819,8 @@ fn flushReplacements(self: *Agent, path: []const u8, content: []const u8, replac if (replacements.len == 0) return; script.writeAtomic(self.allocator, std.fs.cwd(), path, content, replacements) catch |err| { self.terminal.printError( - "Failed to update script {s}: {s} (script left unchanged)", - .{ path, @errorName(err) }, + "Failed to update script {s}: {s} {s}", + .{ path, @errorName(err), script.writeAtomicErrorTail(err) }, ); return; }; @@ -1134,7 +1136,8 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 { if (tc.is_error) continue; const tool = std.meta.stringToEnum(BrowserTool, tc.name) orelse continue; if (last_extract_idx) |idx| if (tool == .extract and idx != i) continue; - const cmd = Command.fromToolCall(tool, tc.arguments); + const args = browser_tools.normalizeArgKeys(self.message_arena.allocator(), tool, tc.arguments) catch tc.arguments; + const cmd = Command.fromToolCall(tool, args); if (!cmd.isRecorded()) continue; if (!recorded_any) { if (input.record_comment) |c| r.recordComment(c); diff --git a/src/browser/tools.zig b/src/browser/tools.zig index fb68759e..917acb14 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -549,7 +549,10 @@ pub fn call( return .{ .text = msg, .is_error = true }; // Must run before substituteStringArgs so the `key=="value"` secret- // redaction check there still triggers on PascalCase keys. - const normalized = try normalizeArgKeys(arena, tool, arguments); + const normalized = normalizeArgKeys(arena, tool, arguments) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.DuplicateField => return error.InvalidParams, + }; const substituted = try substituteStringArgs(arena, tool, normalized); return dispatch(arena, session, registry, tool, substituted) catch |err| { @@ -1019,7 +1022,7 @@ fn execFill(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R const Params = struct { backendNodeId: ?CDPNode.Id = null, selector: ?[]const u8 = null, - value: []const u8 = "", + value: []const u8, }; const args = try parseArgs(Params, arena, arguments); const raw_text = args.value; @@ -1421,7 +1424,7 @@ pub fn parseArgs(comptime T: type, arena: std.mem.Allocator, arguments: ?std.jso /// echoes the original placeholder so the credential never surfaces in the /// result text. Co-located with `execFill` so both halves of the carve-out /// live in one file. -fn normalizeArgKeys(arena: std.mem.Allocator, tool: Tool, args: ?std.json.Value) error{OutOfMemory}!?std.json.Value { +pub fn normalizeArgKeys(arena: std.mem.Allocator, tool: Tool, args: ?std.json.Value) !?std.json.Value { const v = args orelse return null; if (v != .object) return v; @@ -1441,7 +1444,9 @@ fn normalizeArgKeys(arena: std.mem.Allocator, tool: Tool, args: ?std.json.Value) it = v.object.iterator(); while (it.next()) |entry| { const canonical = if (schema.findField(entry.key_ptr.*)) |f| f.name else entry.key_ptr.*; - try new_obj.put(canonical, entry.value_ptr.*); + const gop = try new_obj.getOrPut(canonical); + if (gop.found_existing) return error.DuplicateField; + gop.value_ptr.* = entry.value_ptr.*; } return .{ .object = new_obj }; } diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index ad18335c..05d2f98a 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -180,7 +180,7 @@ fn dispatchBrowserTool( return server.sendError(id, code, @errorName(err)); }; - if (!result.is_error) recordIfActive(server, tool, arguments); + if (!result.is_error) recordIfActive(arena, server, tool, arguments); try sendToolResultText(server, id, result.text, result.is_error); } @@ -189,9 +189,10 @@ fn surfacesErrorInBand(tool: BrowserTool) bool { return tool == .eval or tool == .extract; } -fn recordIfActive(server: *Server, tool: BrowserTool, arguments: ?std.json.Value) void { +fn recordIfActive(arena: std.mem.Allocator, server: *Server, tool: BrowserTool, arguments: ?std.json.Value) void { if (server.recorder == null) return; - const cmd = Command.fromToolCall(tool, arguments); + const normalized = browser_tools.normalizeArgKeys(arena, tool, arguments) catch arguments; + const cmd = Command.fromToolCall(tool, normalized); // `record` no-ops on non-recorded tools — see `Command.isRecorded`. server.recorder.?.record(cmd); } @@ -361,7 +362,7 @@ fn handleScriptHeal(server: *Server, arena: std.mem.Allocator, id: std.json.Valu } script.writeAtomic(arena, std.fs.cwd(), args.path, content, splices) catch |err| { - const msg = std.fmt.allocPrint(arena, "failed to write {s}: {s} (script left unchanged)", .{ args.path, @errorName(err) }) catch @errorName(err); + const msg = std.fmt.allocPrint(arena, "failed to write {s}: {s} {s}", .{ args.path, @errorName(err), script.writeAtomicErrorTail(err) }) catch @errorName(err); return sendErrorContent(server, id, msg); }; diff --git a/src/script.zig b/src/script.zig index f788924e..1e06f31b 100644 --- a/src/script.zig +++ b/src/script.zig @@ -165,9 +165,9 @@ pub fn applyReplacements( /// Atomically rewrite `dir`/`path` with `content` after `replacements` are /// applied. Builds the new content first (so an OOM here doesn't clobber a -/// prior `.bak`), then writes a fresh `.bak` of the original, then uses -/// Zig's `atomicFile` (write-to-temp + rename) for the live file. On -/// failure the original is left intact. +/// prior `.bak`), commits the live file via `atomicFile`, then refreshes +/// `.bak`. Pre-commit errors leave the original intact; a `.bak`-only +/// failure surfaces as `error.BakUpdateFailed` (live has been rewritten). pub fn writeAtomic( allocator: std.mem.Allocator, dir: std.fs.Dir, @@ -191,8 +191,13 @@ pub fn writeAtomic( try af.finish(); var bak_buf: [std.fs.max_path_bytes]u8 = undefined; - const bak_path = try std.fmt.bufPrint(&bak_buf, "{s}.bak", .{path}); - try dir.writeFile(.{ .sub_path = bak_path, .data = content }); + const bak_path = std.fmt.bufPrint(&bak_buf, "{s}.bak", .{path}) catch return error.BakUpdateFailed; + dir.writeFile(.{ .sub_path = bak_path, .data = content }) catch return error.BakUpdateFailed; +} + +/// Human-readable tail explaining file state after a `writeAtomic` error. +pub fn writeAtomicErrorTail(err: anyerror) []const u8 { + return if (err == error.BakUpdateFailed) "(live file updated; .bak refresh failed)" else "(script left unchanged)"; } /// Replacement body: either parsed Commands (agent self-heal) or pre-rendered @@ -447,9 +452,10 @@ test "writeAtomic: commits rewrite even when .bak write fails" { // Force the .bak write to fail by putting a directory at the .bak path. try tmp.dir.makeDir("script.lp.bak"); - try std.testing.expect(std.meta.isError( + try std.testing.expectError( + error.BakUpdateFailed, writeAtomic(std.testing.allocator, tmp.dir, "script.lp", original, &replacements), - )); + ); var buf: [256]u8 = undefined; const live = tmp.dir.openFile("script.lp", .{}) catch unreachable; diff --git a/src/script/Schema.zig b/src/script/Schema.zig index d1c14c5b..c55e7d7f 100644 --- a/src/script/Schema.zig +++ b/src/script/Schema.zig @@ -67,6 +67,7 @@ pub const ParseError = error{ UnknownTool, UnknownField, MissingRequired, + DuplicateField, MalformedKv, PositionalNotAllowed, UnterminatedQuote, @@ -104,14 +105,19 @@ pub fn findField(self: Schema, key: []const u8) ?FieldEntry { return null; } -/// Rename keys in `obj` to canonical casing. Unknown keys pass through. -pub fn normalizeKeys(self: Schema, obj: *std.json.ObjectMap) error{OutOfMemory}!void { +/// Rename keys in `obj` to canonical casing. Unknown keys pass through; +/// keys that collide on the canonical form return `error.DuplicateField`. +pub fn normalizeKeys(self: Schema, obj: *std.json.ObjectMap) !void { const Rename = struct { from: []const u8, to: []const u8 }; var renames: std.ArrayList(Rename) = .empty; var it = obj.iterator(); while (it.next()) |entry| { const field = self.findField(entry.key_ptr.*) orelse continue; if (!std.mem.eql(u8, field.name, entry.key_ptr.*)) { + if (obj.contains(field.name)) return error.DuplicateField; + for (renames.items) |r| { + if (std.mem.eql(u8, r.to, field.name)) return error.DuplicateField; + } try renames.append(obj.allocator, .{ .from = entry.key_ptr.*, .to = field.name }); } } @@ -493,13 +499,19 @@ fn stripQuotes(raw: []const u8) []const u8 { return raw; } -/// Quoted positionals (`'https://x?id=42'`) must not be misread as kv — -/// only look for `=` in the unquoted prefix. +/// True for unquoted `=` tokens. Quoted positionals +/// and URLs containing `=` fall through. fn looksLikeKv(tok: []const u8) bool { if (tok.len == 0) return false; if (tok[0] == '\'' or tok[0] == '"') return false; const end = std.mem.indexOfAny(u8, tok, "'\"") orelse tok.len; - return std.mem.indexOfScalar(u8, tok[0..end], '=') != null; + const eq = std.mem.indexOfScalar(u8, tok[0..end], '=') orelse return false; + if (eq == 0) return false; + if (!std.ascii.isAlphabetic(tok[0]) and tok[0] != '_') return false; + for (tok[1..eq]) |c| { + if (!std.ascii.isAlphanumeric(c) and c != '_') return false; + } + return true; } // --- Recorder-side formatting primitives --- @@ -660,6 +672,14 @@ test "parseValue: positional then kv tail" { try testing.expectEqual(@as(i64, 5000), v.object.get("timeout").?.integer); } +test "parseValue: unquoted URL with `=` in query string binds positional" { + var arena: std.heap.ArenaAllocator = .init(testing.allocator); + defer arena.deinit(); + const goto = Schema.find(Schema.all(), "goto").?; + const v = (try goto.parseValue(arena.allocator(), "https://example.com/?id=42")).?; + try testing.expectString("https://example.com/?id=42", v.object.get("url").?.string); +} + test "parseValue: kv-only multi-required" { var arena: std.heap.ArenaAllocator = .init(testing.allocator); defer arena.deinit(); @@ -717,6 +737,16 @@ test "parseValue: arg keys are case-insensitive (json path)" { try testing.expect(v.object.get("Selector") == null); } +test "parseValue: duplicate case-variants of the same field are rejected" { + var arena: std.heap.ArenaAllocator = .init(testing.allocator); + defer arena.deinit(); + const click = Schema.find(Schema.all(), "click").?; + try testing.expectError( + error.DuplicateField, + click.parseValue(arena.allocator(), "{\"Selector\":\"#a\",\"selector\":\"#b\"}"), + ); +} + test "parseValue: setChecked defaults checked=true when omitted" { var arena: std.heap.ArenaAllocator = .init(testing.allocator); defer arena.deinit();