agent: improve recording quality and tool guidance

- Filter redundant `extract` calls in recordings to reduce noise.
- Scrub literal `LP_*` secrets from recorded commands for portability.
- Throw a descriptive error in `extract` when no selectors match.
- Update tool descriptions and guidance to favor a `tree` ->
  `nodeDetails` -> `extract` workflow for more robust selectors.
This commit is contained in:
Adrià Arrufat
2026-05-14 13:33:22 +02:00
parent 9ac2c03a7e
commit 2862feb7c4
4 changed files with 87 additions and 19 deletions

View File

@@ -429,11 +429,10 @@ fn handleSlash(self: *Agent, body: []const u8) bool {
};
if (std.meta.stringToEnum(lp.tools.Action, schema.tool_name)) |action| {
const parsed_args: ?std.json.Value = if (args_json.len == 0) null else
std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
return false;
};
const parsed_args: ?std.json.Value = if (args_json.len == 0) null else std.json.parseFromSliceLeaky(std.json.Value, aa, args_json, .{}) catch {
self.terminal.printErrorFmt("{s}: invalid JSON arguments", .{schema.tool_name});
return false;
};
if (lp.tools.callEvalLike(aa, self.tool_executor.session, &self.tool_executor.node_registry, action, parsed_args)) |outcome| {
self.terminal.beginTool(schema.tool_name, rest);
const result = outcome catch |err| {
@@ -948,9 +947,21 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
defer result.deinit();
if (self.recorder) |*r| if (r.isActive()) {
var recorded_any = false;
for (result.tool_calls_made) |tc| {
// Probing: when the LLM tries several `extract` schemas in one turn,
// keep only the last successful one in the recording — earlier
// attempts are noise.
var last_extract_idx: ?usize = null;
for (result.tool_calls_made, 0..) |tc, i| {
if (tc.is_error) continue;
if (std.mem.eql(u8, tc.name, "extract")) last_extract_idx = i;
}
var recorded_any = false;
for (result.tool_calls_made, 0..) |tc, i| {
if (tc.is_error) continue;
if (std.mem.eql(u8, tc.name, "extract")) {
if (last_extract_idx) |idx| if (idx != i) continue;
}
const args = tc.arguments orelse continue;
const cmd = Command.fromToolCall(tc.name, args) orelse continue;
if (!recorded_any) {

View File

@@ -161,7 +161,7 @@ pub const tool_defs = [_]ToolDef{
},
.{
.name = "tree",
.description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.",
.description = "Get the page content as a simplified semantic DOM tree (role, name, value, backendNodeId per node) for AI reasoning. Tree output does NOT include raw HTML attributes like `id` or `class` — once you've located the node you want, call `nodeDetails` on its backendNodeId to read its id/class for selector synthesis. If a url is provided, it navigates to that url first.",
.input_schema = minify(
\\{
\\ "type": "object",
@@ -177,7 +177,7 @@ pub const tool_defs = [_]ToolDef{
},
.{
.name = "nodeDetails",
.description = "Get detailed information about a specific node by its backend node ID. Returns tag, role, name, interactivity, disabled state, value, input type, placeholder, href, checked state, and select options.",
.description = "Get detailed information about a specific node by its backend node ID. Returns tag, role, name, interactivity, disabled state, value, input type, placeholder, href, **id**, **class**, checked state, and select options. Use this AFTER `tree` to discover the id/class of a node you want to target with `extract` or `click` — it's the canonical way to turn a tree backendNodeId into a CSS selector.",
.input_schema = minify(
\\{
\\ "type": "object",
@@ -576,7 +576,13 @@ const schema_walker_prefix =
\\ return valueOf(t, v);
\\ }
\\ const out = {};
\\ for (const k in schema) out[k] = ext(document, schema[k]);
\\ let any = false;
\\ for (const k in schema) {
\\ out[k] = ext(document, schema[k]);
\\ const v = out[k];
\\ if (v !== null && !(Array.isArray(v) && v.length === 0)) any = true;
\\ }
\\ if (!any) throw new Error("extract: no selector in the schema matched any element on the page — inspect with `tree` or `markdown` first to find real ids/classes, then retry with a corrected schema");
\\ return out;
\\})(
;
@@ -1239,6 +1245,31 @@ pub fn substituteEnvVars(arena: std.mem.Allocator, input: []const u8) error{OutO
return result.toOwnedSlice(arena);
}
/// Inverse of `substituteEnvVars`: replace literal LP_* env-var values with
/// their `$LP_NAME` placeholders. Used by the recorder so an agent that
/// paste-substituted a credential into a tool call doesn't leak the
/// resolved secret into the .lp file. Returns input unchanged when nothing
/// matched. Short values (< 4 chars) are skipped to avoid false-positive
/// matches against incidental substrings.
pub fn reverseSubstituteEnvVars(arena: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
const env_names = lpEnvNames() catch return input;
var current: []const u8 = input;
var changed = false;
var name_buf: [256]u8 = undefined;
for (env_names) |name| {
const value = lookupLpEnv(name) orelse continue;
if (value.len < 4) continue;
if (std.mem.indexOf(u8, current, value) == null) continue;
if (name.len + 1 >= name_buf.len) continue;
name_buf[0] = '$';
@memcpy(name_buf[1 .. 1 + name.len], name);
const placeholder = name_buf[0 .. 1 + name.len];
current = try std.mem.replaceOwned(u8, arena, current, value, placeholder);
changed = true;
}
return if (changed) current else input;
}
test "substituteEnvVars resolves LP_* vars" {
var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
defer arena.deinit();

View File

@@ -82,11 +82,16 @@ pub const mcp_driver_guidance =
\\ find the id/class/structure, then write a plain selector against that.
\\
\\Credentials:
\\- When filling credentials, pass environment variable references like
\\ $LP_USERNAME and $LP_PASSWORD directly as the `value` field of fill —
\\ they are resolved inside the Lightpanda subprocess so the literal
\\ secret never enters your context. Do NOT call getEnv with a credential
\\ name; getEnv returns the value and would leak it into your context.
\\- Pass `$LP_*` references directly in ANY tool's string args — fill
\\ values, goto URLs, click selectors, anywhere a credential appears.
\\ The placeholder is resolved inside the Lightpanda subprocess so the
\\ literal secret never enters your context. If `getUrl` returns a URL
\\ where the credential has already been resolved (e.g.
\\ `?id=actualname`), DO NOT retype the literal value into a follow-up
\\ `goto` — keep using the `$LP_*` form. Retyping leaks the secret into
\\ the recording.
\\- Do NOT call getEnv with a credential name; getEnv returns the value
\\ and would leak it into your context.
\\- To discover which variables are available, call getEnv with NO `name`
\\ argument — it lists every LP_* variable that is set, names only,
\\ values never included. Safe to call before logging in to pick the
@@ -112,9 +117,22 @@ pub const mcp_driver_guidance =
\\ recorded as an `EXTRACT` PandaScript line, so a later replay (no LLM)
\\ prints the value to stdout. Reading the page via `markdown` and
\\ answering only in chat does NOT survive replay.
\\- Use `markdown` / `tree` / `interactiveElements` to *discover* the right
\\ selector, then commit to one `extract` call. See the `extract` tool
\\ description for the schema grammar and examples.
\\- After every navigation (`goto` or a `click` that changes URL), call
\\ `tree` BEFORE any `extract`. Do NOT rely on memorized site structure —
\\ even well-known sites (Hacker News, GitHub, …) are where models go
\\ wrong, because they pattern-match training data instead of reading
\\ the current DOM.
\\- `tree` returns roles/names/text and a `backendNodeId` per node but NOT
\\ raw HTML attributes. To turn a tree node into a CSS selector, call
\\ `nodeDetails(backendNodeId)` on the node you want — it returns the
\\ element's `id` and `class`. Build your selector from those (e.g.
\\ `#karma`, `.athing`) rather than guessing structural paths like
\\ `tr:nth-child(3) td:nth-child(2)`.
\\- Commit to one `extract` call against an id/class you read from
\\ `nodeDetails`. If `extract` errors with "no selector matched", you
\\ guessed; go back to tree/nodeDetails. If the value comes back but
\\ looks wrong (wrong type, garbage text), same thing.
\\- See the `extract` tool description for the schema grammar and examples.
\\
;

View File

@@ -77,7 +77,15 @@ pub fn record(self: *Self, cmd: Command.Command) void {
self.buf.clearRetainingCapacity();
cmd.format(&self.buf.writer) catch return;
self.buf.writer.writeByte('\n') catch return;
self.writeOrDisable(self.buf.written()) catch return;
// Reverse-substitute any LP_* env-var values that snuck in as literals
// (e.g. an agent that retyped a username it saw via getUrl) so the
// recording stays portable instead of leaking the resolved secret.
var scrub_arena: std.heap.ArenaAllocator = .init(self.allocator);
defer scrub_arena.deinit();
const scrubbed = lp.tools.reverseSubstituteEnvVars(scrub_arena.allocator(), self.buf.written()) catch self.buf.written();
self.writeOrDisable(scrubbed) catch return;
self.lines += 1;
}