agent: improve recording quality and tool guidance

- Filter redundant `extract` calls in recordings to reduce noise. - Scrub literal `LP_*` secrets from recorded commands for portability. - Throw a descriptive error in `extract` when no selectors match. - Update tool descriptions and guidance to favor a `tree` -> `nodeDetails` -> `extract` workflow for more robust selectors.
2026-06-11 09:35:59 -04:00 · 2026-05-14 13:33:22 +02:00
parent 9ac2c03a7e
commit 2862feb7c4
4 changed files with 87 additions and 19 deletions
--- a/src/agent/Agent.zig
+++ b/src/agent/Agent.zig
@@ -429,11 +429,10 @@ fn handleSlash(self: *Agent, body: []const u8) bool {
    };

    if (std.meta.stringToEnum(lp.tools.Action, schema.tool_name)) |action| {
-        const parsed_args: ?std.json.Value = if (args_json.len == 0) null else
-            std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
-                self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
-                return false;
-            };
+        const parsed_args: ?std.json.Value = if (args_json.len == 0) null else std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
+            self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
+            return false;
+        };
        if (lp.tools.callEvalLike(aa, self.tool_executor.session, &self.tool_executor.node_registry, action, parsed_args)) |outcome| {
            self.terminal.beginTool(schema.tool_name, rest);
            const result = outcome catch |err| {
@@ -948,9 +947,21 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
    defer result.deinit();

    if (self.recorder) |*r| if (r.isActive()) {
-        var recorded_any = false;
-        for (result.tool_calls_made) |tc| {
+        // Probing: when the LLM tries several `extract` schemas in one turn,
+        // keep only the last successful one in the recording — earlier
+        // attempts are noise.
+        var last_extract_idx: ?usize = null;
+        for (result.tool_calls_made, 0..) |tc, i| {
            if (tc.is_error) continue;
+            if (std.mem.eql(u8, tc.name, "extract")) last_extract_idx = i;
+        }
+
+        var recorded_any = false;
+        for (result.tool_calls_made, 0..) |tc, i| {
+            if (tc.is_error) continue;
+            if (std.mem.eql(u8, tc.name, "extract")) {
+                if (last_extract_idx) |idx| if (idx != i) continue;
+            }
            const args = tc.arguments orelse continue;
            const cmd = Command.fromToolCall(tc.name, args) orelse continue;
            if (!recorded_any) {
--- a/src/browser/tools.zig
+++ b/src/browser/tools.zig
@@ -161,7 +161,7 @@ pub const tool_defs = [_]ToolDef{
    },
    .{
        .name = "tree",
-        .description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.",
+        .description = "Get the page content as a simplified semantic DOM tree (role, name, value, backendNodeId per node) for AI reasoning. Tree output does NOT include raw HTML attributes like `id` or `class` — once you've located the node you want, call `nodeDetails` on its backendNodeId to read its id/class for selector synthesis. If a url is provided, it navigates to that url first.",
        .input_schema = minify(
            \\{
            \\  "type": "object",
@@ -177,7 +177,7 @@ pub const tool_defs = [_]ToolDef{
    },
    .{
        .name = "nodeDetails",
-        .description = "Get detailed information about a specific node by its backend node ID. Returns tag, role, name, interactivity, disabled state, value, input type, placeholder, href, checked state, and select options.",
+        .description = "Get detailed information about a specific node by its backend node ID. Returns tag, role, name, interactivity, disabled state, value, input type, placeholder, href, **id**, **class**, checked state, and select options. Use this AFTER `tree` to discover the id/class of a node you want to target with `extract` or `click` — it's the canonical way to turn a tree backendNodeId into a CSS selector.",
        .input_schema = minify(
            \\{
            \\  "type": "object",
@@ -576,7 +576,13 @@ const schema_walker_prefix =
    \\    return valueOf(t, v);
    \\  }
    \\  const out = {};
-    \\  for (const k in schema) out[k] = ext(document, schema[k]);
+    \\  let any = false;
+    \\  for (const k in schema) {
+    \\    out[k] = ext(document, schema[k]);
+    \\    const v = out[k];
+    \\    if (v !== null && !(Array.isArray(v) && v.length === 0)) any = true;
+    \\  }
+    \\  if (!any) throw new Error("extract: no selector in the schema matched any element on the page — inspect with `tree` or `markdown` first to find real ids/classes, then retry with a corrected schema");
    \\  return out;
    \\})(
 ;
@@ -1239,6 +1245,31 @@ pub fn substituteEnvVars(arena: std.mem.Allocator, input: []const u8) error{OutO
    return result.toOwnedSlice(arena);
 }

+/// Inverse of `substituteEnvVars`: replace literal LP_* env-var values with
+/// their `$LP_NAME` placeholders. Used by the recorder so an agent that
+/// paste-substituted a credential into a tool call doesn't leak the
+/// resolved secret into the .lp file. Returns input unchanged when nothing
+/// matched. Short values (< 4 chars) are skipped to avoid false-positive
+/// matches against incidental substrings.
+pub fn reverseSubstituteEnvVars(arena: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
+    const env_names = lpEnvNames() catch return input;
+    var current: []const u8 = input;
+    var changed = false;
+    var name_buf: [256]u8 = undefined;
+    for (env_names) |name| {
+        const value = lookupLpEnv(name) orelse continue;
+        if (value.len < 4) continue;
+        if (std.mem.indexOf(u8, current, value) == null) continue;
+        if (name.len + 1 >= name_buf.len) continue;
+        name_buf[0] = '$';
+        @memcpy(name_buf[1 .. 1 + name.len], name);
+        const placeholder = name_buf[0 .. 1 + name.len];
+        current = try std.mem.replaceOwned(u8, arena, current, value, placeholder);
+        changed = true;
+    }
+    return if (changed) current else input;
+}
+
 test "substituteEnvVars resolves LP_* vars" {
    var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
    defer arena.deinit();
--- a/src/script.zig
+++ b/src/script.zig
@@ -82,11 +82,16 @@ pub const mcp_driver_guidance =
    \\  find the id/class/structure, then write a plain selector against that.
    \\
    \\Credentials:
-    \\- When filling credentials, pass environment variable references like
-    \\  $LP_USERNAME and $LP_PASSWORD directly as the `value` field of fill —
-    \\  they are resolved inside the Lightpanda subprocess so the literal
-    \\  secret never enters your context. Do NOT call getEnv with a credential
-    \\  name; getEnv returns the value and would leak it into your context.
+    \\- Pass `$LP_*` references directly in ANY tool's string args — fill
+    \\  values, goto URLs, click selectors, anywhere a credential appears.
+    \\  The placeholder is resolved inside the Lightpanda subprocess so the
+    \\  literal secret never enters your context. If `getUrl` returns a URL
+    \\  where the credential has already been resolved (e.g.
+    \\  `?id=actualname`), DO NOT retype the literal value into a follow-up
+    \\  `goto` — keep using the `$LP_*` form. Retyping leaks the secret into
+    \\  the recording.
+    \\- Do NOT call getEnv with a credential name; getEnv returns the value
+    \\  and would leak it into your context.
    \\- To discover which variables are available, call getEnv with NO `name`
    \\  argument — it lists every LP_* variable that is set, names only,
    \\  values never included. Safe to call before logging in to pick the
@@ -112,9 +117,22 @@ pub const mcp_driver_guidance =
    \\  recorded as an `EXTRACT` PandaScript line, so a later replay (no LLM)
    \\  prints the value to stdout. Reading the page via `markdown` and
    \\  answering only in chat does NOT survive replay.
-    \\- Use `markdown` / `tree` / `interactiveElements` to *discover* the right
-    \\  selector, then commit to one `extract` call. See the `extract` tool
-    \\  description for the schema grammar and examples.
+    \\- After every navigation (`goto` or a `click` that changes URL), call
+    \\  `tree` BEFORE any `extract`. Do NOT rely on memorized site structure —
+    \\  even well-known sites (Hacker News, GitHub, …) are where models go
+    \\  wrong, because they pattern-match training data instead of reading
+    \\  the current DOM.
+    \\- `tree` returns roles/names/text and a `backendNodeId` per node but NOT
+    \\  raw HTML attributes. To turn a tree node into a CSS selector, call
+    \\  `nodeDetails(backendNodeId)` on the node you want — it returns the
+    \\  element's `id` and `class`. Build your selector from those (e.g.
+    \\  `#karma`, `.athing`) rather than guessing structural paths like
+    \\  `tr:nth-child(3) td:nth-child(2)`.
+    \\- Commit to one `extract` call against an id/class you read from
+    \\  `nodeDetails`. If `extract` errors with "no selector matched", you
+    \\  guessed; go back to tree/nodeDetails. If the value comes back but
+    \\  looks wrong (wrong type, garbage text), same thing.
+    \\- See the `extract` tool description for the schema grammar and examples.
    \\
 ;

--- a/src/script/Recorder.zig
+++ b/src/script/Recorder.zig
@@ -77,7 +77,15 @@ pub fn record(self: *Self, cmd: Command.Command) void {
    self.buf.clearRetainingCapacity();
    cmd.format(&self.buf.writer) catch return;
    self.buf.writer.writeByte('\n') catch return;
-    self.writeOrDisable(self.buf.written()) catch return;
+
+    // Reverse-substitute any LP_* env-var values that snuck in as literals
+    // (e.g. an agent that retyped a username it saw via getUrl) so the
+    // recording stays portable instead of leaking the resolved secret.
+    var scrub_arena: std.heap.ArenaAllocator = .init(self.allocator);
+    defer scrub_arena.deinit();
+    const scrubbed = lp.tools.reverseSubstituteEnvVars(scrub_arena.allocator(), self.buf.written()) catch self.buf.written();
+
+    self.writeOrDisable(scrubbed) catch return;
    self.lines += 1;
 }