agent: unify verification logic and failure reporting

Consolidates verification and failure reason generation into a single
call. Optimizes state capture to be command-aware and fixes EVAL block
parsing logic in `Command.zig`.
This commit is contained in:
Adrià Arrufat
2026-04-13 13:11:02 +02:00
parent 35200f9235
commit ed8ae3f83a
3 changed files with 74 additions and 121 deletions

View File

@@ -313,29 +313,34 @@ fn runScript(self: *Self, path: []const u8) bool {
var cmd_arena = std.heap.ArenaAllocator.init(self.allocator);
defer cmd_arena.deinit();
const pre_state = if (self.self_heal) self.verifier.capturePreState(cmd_arena.allocator()) else undefined;
const pre_state: ?Verifier.PreState = if (self.self_heal)
self.verifier.capturePreState(cmd_arena.allocator(), entry.command)
else
null;
const result = self.cmd_executor.executeWithResult(cmd_arena.allocator(), entry.command);
self.cmd_executor.printResult(entry.command, result);
const effective_failed = result.failed or
(self.self_heal and !result.failed and
self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state, last_intent) == .failed);
const verification = if (!result.failed and pre_state != null)
self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state.?, last_intent)
else
Verifier.VerifyResult{ .result = .passed };
const effective_failed = result.failed or verification.result == .failed;
if (effective_failed) {
if (self.self_heal and self.ai_client != null) {
// Phase 4: retry with wait before LLM escalation for
// Retry with wait before LLM escalation for
// verification failures (not hard failures).
if (!result.failed and isRetryable(entry.command)) {
var retried = false;
for (0..2) |retry_i| {
_ = retry_i;
for (0..2) |_| {
std.Thread.sleep(500 * std.time.ns_per_ms);
self.terminal.printInfo("Retrying command...");
const retry_pre = self.verifier.capturePreState(cmd_arena.allocator(), entry.command);
const retry_result = self.cmd_executor.executeWithResult(cmd_arena.allocator(), entry.command);
if (!retry_result.failed) {
const retry_pre = self.verifier.capturePreState(cmd_arena.allocator());
_ = retry_pre;
if (self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state, last_intent) != .failed) {
if (self.verifier.verify(cmd_arena.allocator(), entry.command, retry_pre, last_intent).result != .failed) {
self.cmd_executor.printResult(entry.command, retry_result);
retried = true;
break;
@@ -345,15 +350,14 @@ fn runScript(self: *Self, path: []const u8) bool {
if (retried) continue;
}
// Phase 5: include verification context in self-heal prompt.
const verify_context: ?[]const u8 = if (!result.failed)
self.verifier.failureReason(cmd_arena.allocator(), entry.command, pre_state, last_intent)
const msg = if (result.failed)
"Command failed, attempting self-healing..."
else
null;
"Command succeeded but verification failed, attempting self-healing...";
self.terminal.printInfo(msg);
self.terminal.printInfo(if (result.failed) "Command failed, attempting self-healing..." else "Command succeeded but verification failed, attempting self-healing...");
if (self.attemptSelfHeal(last_intent, entry.raw_line, verify_context, sa)) |healed_cmds| {
if (self.formatReplacement(sa, entry.raw_span, entry.raw_line, healed_cmds)) |replacement| {
if (self.attemptSelfHeal(last_intent, entry.raw_line, verification.reason, sa)) |healed_cmds| {
if (formatReplacement(sa, entry.raw_span, entry.raw_line, healed_cmds)) |replacement| {
replacements.append(sa, replacement) catch {};
}
continue;
@@ -375,8 +379,7 @@ fn runScript(self: *Self, path: []const u8) bool {
return true;
}
fn formatReplacement(self: *Self, arena: std.mem.Allocator, original_span: []const u8, raw_line: []const u8, cmds: []const Command.Command) ?Replacement {
_ = self;
fn formatReplacement(arena: std.mem.Allocator, original_span: []const u8, raw_line: []const u8, cmds: []const Command.Command) ?Replacement {
var aw: std.Io.Writer.Allocating = .init(arena);
aw.writer.print("# [Auto-healed] Original: {s}\n", .{raw_line}) catch return null;

View File

@@ -256,24 +256,17 @@ pub const ScriptIterator = struct {
if (isEvalTripleQuote(trimmed)) |quote_type| {
const start_line = self.line_num;
const span_end = blk: {
const js_or_null: ?[]const u8 = self.collectEvalBlock(quote_type);
const end = self.lines.index orelse self.lines.buffer.len;
if (js_or_null) |js| {
return .{
.line_num = start_line,
.raw_line = trimmed,
.raw_span = self.lines.buffer[line_start..end],
.command = .{ .eval_js = js },
};
}
break :blk end;
};
const js_or_null = self.collectEvalBlock(quote_type);
const span_end = self.lines.index orelse self.lines.buffer.len;
const cmd: Command = if (js_or_null) |js|
.{ .eval_js = js }
else
.{ .natural_language = "unterminated EVAL block" };
return .{
.line_num = start_line,
.raw_line = trimmed,
.raw_span = self.lines.buffer[line_start..span_end],
.command = .{ .natural_language = "unterminated EVAL block" },
.command = cmd,
};
}

View File

@@ -12,89 +12,97 @@ pub const Result = enum {
inconclusive,
};
pub const VerifyResult = struct {
result: Result,
reason: ?[]const u8 = null,
};
pub const PreState = struct {
url: []const u8,
dom_element_count: ?u32,
};
pub fn capturePreState(self: *Self, arena: std.mem.Allocator) PreState {
pub fn capturePreState(self: *Self, arena: std.mem.Allocator, cmd: Command.Command) PreState {
return .{
.url = self.tool_executor.getCurrentUrl(),
.dom_element_count = self.getDomElementCount(arena),
.dom_element_count = if (cmd == .click) self.getDomElementCount(arena) else null,
};
}
/// Returns the reason verification failed, or null if it passed/was inconclusive.
pub fn failureReason(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) ?[]const u8 {
return switch (cmd) {
.type_cmd => |args| self.fillFailureReason(arena, args.selector, args.value),
.check => |args| self.checkFailureReason(arena, args.selector, args.checked),
.click => self.clickFailureReason(arena, pre, intent),
else => null,
};
}
/// Verify that a command achieved its intent after execution.
/// Verify that a command achieved its intent after execution and return
/// both the verdict and a human-readable failure reason (if applicable).
/// Only called when the command did not hard-fail (ExecResult.failed == false).
pub fn verify(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) Result {
pub fn verify(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) VerifyResult {
return switch (cmd) {
.type_cmd => |args| self.verifyFill(arena, args.selector, args.value),
.check => |args| self.verifyCheck(arena, args.selector, args.checked),
.click => self.verifyClick(arena, pre, intent),
else => .passed,
else => .{ .result = .passed },
};
}
fn verifyFill(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) Result {
fn verifyFill(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) VerifyResult {
const script = std.fmt.allocPrint(
arena,
"(function(){{ var el = document.querySelector({s}); return el ? el.value : null; }})()",
.{jsonQuote(arena, selector)},
) catch return .inconclusive;
) catch return .{ .result = .inconclusive };
const actual = self.tool_executor.callEval(arena, script) orelse return .inconclusive;
const actual = self.tool_executor.callEval(arena, script) orelse return .{ .result = .inconclusive };
// Secret values ($LP_*): just verify non-empty.
if (std.mem.indexOf(u8, expected_value, "$LP_") != null) {
return if (actual.len == 0 or std.mem.eql(u8, actual, "null")) .failed else .passed;
if (actual.len == 0 or std.mem.eql(u8, actual, "null"))
return .{
.result = .failed,
.reason = std.fmt.allocPrint(arena, "element value is empty after fill (expected non-empty for secret)", .{}) catch null,
};
return .{ .result = .passed };
}
// Plain values: exact comparison.
return if (std.mem.eql(u8, actual, expected_value)) .passed else .failed;
if (!std.mem.eql(u8, actual, expected_value))
return .{
.result = .failed,
.reason = std.fmt.allocPrint(arena, "element value is \"{s}\" after fill (expected \"{s}\")", .{ actual, expected_value }) catch null,
};
return .{ .result = .passed };
}
fn verifyCheck(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) Result {
fn verifyCheck(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) VerifyResult {
const script = std.fmt.allocPrint(
arena,
"(function(){{ var el = document.querySelector({s}); return el ? String(el.checked) : null; }})()",
.{jsonQuote(arena, selector)},
) catch return .inconclusive;
) catch return .{ .result = .inconclusive };
const actual = self.tool_executor.callEval(arena, script) orelse return .inconclusive;
const actual = self.tool_executor.callEval(arena, script) orelse return .{ .result = .inconclusive };
const expected_str: []const u8 = if (expected) "true" else "false";
return if (std.mem.eql(u8, actual, expected_str)) .passed else .failed;
if (!std.mem.eql(u8, actual, expected_str))
return .{
.result = .failed,
.reason = std.fmt.allocPrint(arena, "element checked state is {s} (expected {s})", .{ actual, expected_str }) catch null,
};
return .{ .result = .passed };
}
fn verifyClick(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) Result {
// URL changed → click had an effect
fn verifyClick(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) VerifyResult {
const current_url = self.tool_executor.getCurrentUrl();
if (!std.mem.eql(u8, pre.url, current_url)) return .passed;
if (!std.mem.eql(u8, pre.url, current_url)) return .{ .result = .passed };
// DOM element count changed → click had a visible effect (modal, accordion, etc.)
if (pre.dom_element_count) |before_count| {
const after_count = self.getDomElementCount(arena);
if (after_count) |ac| {
if (ac != before_count) return .passed;
if (self.getDomElementCount(arena)) |ac| {
if (ac != before_count) return .{ .result = .passed };
}
}
// URL unchanged, DOM unchanged — check if intent suggests navigation was expected
if (intent) |i| {
if (containsNavigationIntent(i)) return .failed;
if (containsNavigationIntent(i))
return .{
.result = .failed,
.reason = std.fmt.allocPrint(arena, "click had no effect: URL unchanged (still {s}), DOM unchanged, but intent suggests navigation was expected", .{current_url}) catch null,
};
}
// No intent, nothing changed — can't tell if this is wrong
return .inconclusive;
return .{ .result = .inconclusive };
}
fn getDomElementCount(self: *Self, arena: std.mem.Allocator) ?u32 {
@@ -116,57 +124,6 @@ fn containsNavigationIntent(intent: []const u8) bool {
return false;
}
fn fillFailureReason(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) ?[]const u8 {
const script = std.fmt.allocPrint(
arena,
"(function(){{ var el = document.querySelector({s}); return el ? el.value : null; }})()",
.{jsonQuote(arena, selector)},
) catch return null;
const actual = self.tool_executor.callEval(arena, script) orelse return null;
if (std.mem.indexOf(u8, expected_value, "$LP_") != null) {
if (actual.len == 0 or std.mem.eql(u8, actual, "null"))
return std.fmt.allocPrint(arena, "element value is empty after fill (expected non-empty for secret)", .{}) catch null;
return null;
}
if (!std.mem.eql(u8, actual, expected_value))
return std.fmt.allocPrint(arena, "element value is \"{s}\" after fill (expected \"{s}\")", .{ actual, expected_value }) catch null;
return null;
}
fn checkFailureReason(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) ?[]const u8 {
const script = std.fmt.allocPrint(
arena,
"(function(){{ var el = document.querySelector({s}); return el ? String(el.checked) : null; }})()",
.{jsonQuote(arena, selector)},
) catch return null;
const actual = self.tool_executor.callEval(arena, script) orelse return null;
const expected_str: []const u8 = if (expected) "true" else "false";
if (!std.mem.eql(u8, actual, expected_str))
return std.fmt.allocPrint(arena, "element checked state is {s} (expected {s})", .{ actual, expected_str }) catch null;
return null;
}
fn clickFailureReason(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) ?[]const u8 {
const current_url = self.tool_executor.getCurrentUrl();
if (!std.mem.eql(u8, pre.url, current_url)) return null; // URL changed, passed
if (pre.dom_element_count) |before_count| {
if (self.getDomElementCount(arena)) |ac| {
if (ac != before_count) return null; // DOM changed, passed
}
}
if (intent) |i| {
if (containsNavigationIntent(i))
return std.fmt.allocPrint(arena, "click had no effect: URL unchanged (still {s}), DOM unchanged, but intent suggests navigation was expected", .{current_url}) catch null;
}
return null;
}
fn jsonQuote(arena: std.mem.Allocator, s: []const u8) []const u8 {
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(s, .{}, &aw.writer) catch return "\"\"";