mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
agent: unify verification logic and failure reporting
Consolidates verification and failure reason generation into a single call. Optimizes state capture to be command-aware and fixes EVAL block parsing logic in `Command.zig`.
This commit is contained in:
@@ -313,29 +313,34 @@ fn runScript(self: *Self, path: []const u8) bool {
|
||||
var cmd_arena = std.heap.ArenaAllocator.init(self.allocator);
|
||||
defer cmd_arena.deinit();
|
||||
|
||||
const pre_state = if (self.self_heal) self.verifier.capturePreState(cmd_arena.allocator()) else undefined;
|
||||
const pre_state: ?Verifier.PreState = if (self.self_heal)
|
||||
self.verifier.capturePreState(cmd_arena.allocator(), entry.command)
|
||||
else
|
||||
null;
|
||||
|
||||
const result = self.cmd_executor.executeWithResult(cmd_arena.allocator(), entry.command);
|
||||
self.cmd_executor.printResult(entry.command, result);
|
||||
|
||||
const effective_failed = result.failed or
|
||||
(self.self_heal and !result.failed and
|
||||
self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state, last_intent) == .failed);
|
||||
const verification = if (!result.failed and pre_state != null)
|
||||
self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state.?, last_intent)
|
||||
else
|
||||
Verifier.VerifyResult{ .result = .passed };
|
||||
|
||||
const effective_failed = result.failed or verification.result == .failed;
|
||||
|
||||
if (effective_failed) {
|
||||
if (self.self_heal and self.ai_client != null) {
|
||||
// Phase 4: retry with wait before LLM escalation for
|
||||
// Retry with wait before LLM escalation for
|
||||
// verification failures (not hard failures).
|
||||
if (!result.failed and isRetryable(entry.command)) {
|
||||
var retried = false;
|
||||
for (0..2) |retry_i| {
|
||||
_ = retry_i;
|
||||
for (0..2) |_| {
|
||||
std.Thread.sleep(500 * std.time.ns_per_ms);
|
||||
self.terminal.printInfo("Retrying command...");
|
||||
const retry_pre = self.verifier.capturePreState(cmd_arena.allocator(), entry.command);
|
||||
const retry_result = self.cmd_executor.executeWithResult(cmd_arena.allocator(), entry.command);
|
||||
if (!retry_result.failed) {
|
||||
const retry_pre = self.verifier.capturePreState(cmd_arena.allocator());
|
||||
_ = retry_pre;
|
||||
if (self.verifier.verify(cmd_arena.allocator(), entry.command, pre_state, last_intent) != .failed) {
|
||||
if (self.verifier.verify(cmd_arena.allocator(), entry.command, retry_pre, last_intent).result != .failed) {
|
||||
self.cmd_executor.printResult(entry.command, retry_result);
|
||||
retried = true;
|
||||
break;
|
||||
@@ -345,15 +350,14 @@ fn runScript(self: *Self, path: []const u8) bool {
|
||||
if (retried) continue;
|
||||
}
|
||||
|
||||
// Phase 5: include verification context in self-heal prompt.
|
||||
const verify_context: ?[]const u8 = if (!result.failed)
|
||||
self.verifier.failureReason(cmd_arena.allocator(), entry.command, pre_state, last_intent)
|
||||
const msg = if (result.failed)
|
||||
"Command failed, attempting self-healing..."
|
||||
else
|
||||
null;
|
||||
"Command succeeded but verification failed, attempting self-healing...";
|
||||
self.terminal.printInfo(msg);
|
||||
|
||||
self.terminal.printInfo(if (result.failed) "Command failed, attempting self-healing..." else "Command succeeded but verification failed, attempting self-healing...");
|
||||
if (self.attemptSelfHeal(last_intent, entry.raw_line, verify_context, sa)) |healed_cmds| {
|
||||
if (self.formatReplacement(sa, entry.raw_span, entry.raw_line, healed_cmds)) |replacement| {
|
||||
if (self.attemptSelfHeal(last_intent, entry.raw_line, verification.reason, sa)) |healed_cmds| {
|
||||
if (formatReplacement(sa, entry.raw_span, entry.raw_line, healed_cmds)) |replacement| {
|
||||
replacements.append(sa, replacement) catch {};
|
||||
}
|
||||
continue;
|
||||
@@ -375,8 +379,7 @@ fn runScript(self: *Self, path: []const u8) bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
fn formatReplacement(self: *Self, arena: std.mem.Allocator, original_span: []const u8, raw_line: []const u8, cmds: []const Command.Command) ?Replacement {
|
||||
_ = self;
|
||||
fn formatReplacement(arena: std.mem.Allocator, original_span: []const u8, raw_line: []const u8, cmds: []const Command.Command) ?Replacement {
|
||||
var aw: std.Io.Writer.Allocating = .init(arena);
|
||||
|
||||
aw.writer.print("# [Auto-healed] Original: {s}\n", .{raw_line}) catch return null;
|
||||
|
||||
@@ -256,24 +256,17 @@ pub const ScriptIterator = struct {
|
||||
|
||||
if (isEvalTripleQuote(trimmed)) |quote_type| {
|
||||
const start_line = self.line_num;
|
||||
const span_end = blk: {
|
||||
const js_or_null: ?[]const u8 = self.collectEvalBlock(quote_type);
|
||||
const end = self.lines.index orelse self.lines.buffer.len;
|
||||
if (js_or_null) |js| {
|
||||
return .{
|
||||
.line_num = start_line,
|
||||
.raw_line = trimmed,
|
||||
.raw_span = self.lines.buffer[line_start..end],
|
||||
.command = .{ .eval_js = js },
|
||||
};
|
||||
}
|
||||
break :blk end;
|
||||
};
|
||||
const js_or_null = self.collectEvalBlock(quote_type);
|
||||
const span_end = self.lines.index orelse self.lines.buffer.len;
|
||||
const cmd: Command = if (js_or_null) |js|
|
||||
.{ .eval_js = js }
|
||||
else
|
||||
.{ .natural_language = "unterminated EVAL block" };
|
||||
return .{
|
||||
.line_num = start_line,
|
||||
.raw_line = trimmed,
|
||||
.raw_span = self.lines.buffer[line_start..span_end],
|
||||
.command = .{ .natural_language = "unterminated EVAL block" },
|
||||
.command = cmd,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -12,89 +12,97 @@ pub const Result = enum {
|
||||
inconclusive,
|
||||
};
|
||||
|
||||
pub const VerifyResult = struct {
|
||||
result: Result,
|
||||
reason: ?[]const u8 = null,
|
||||
};
|
||||
|
||||
pub const PreState = struct {
|
||||
url: []const u8,
|
||||
dom_element_count: ?u32,
|
||||
};
|
||||
|
||||
pub fn capturePreState(self: *Self, arena: std.mem.Allocator) PreState {
|
||||
pub fn capturePreState(self: *Self, arena: std.mem.Allocator, cmd: Command.Command) PreState {
|
||||
return .{
|
||||
.url = self.tool_executor.getCurrentUrl(),
|
||||
.dom_element_count = self.getDomElementCount(arena),
|
||||
.dom_element_count = if (cmd == .click) self.getDomElementCount(arena) else null,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns the reason verification failed, or null if it passed/was inconclusive.
|
||||
pub fn failureReason(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) ?[]const u8 {
|
||||
return switch (cmd) {
|
||||
.type_cmd => |args| self.fillFailureReason(arena, args.selector, args.value),
|
||||
.check => |args| self.checkFailureReason(arena, args.selector, args.checked),
|
||||
.click => self.clickFailureReason(arena, pre, intent),
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
/// Verify that a command achieved its intent after execution.
|
||||
/// Verify that a command achieved its intent after execution and return
|
||||
/// both the verdict and a human-readable failure reason (if applicable).
|
||||
/// Only called when the command did not hard-fail (ExecResult.failed == false).
|
||||
pub fn verify(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) Result {
|
||||
pub fn verify(self: *Self, arena: std.mem.Allocator, cmd: Command.Command, pre: PreState, intent: ?[]const u8) VerifyResult {
|
||||
return switch (cmd) {
|
||||
.type_cmd => |args| self.verifyFill(arena, args.selector, args.value),
|
||||
.check => |args| self.verifyCheck(arena, args.selector, args.checked),
|
||||
.click => self.verifyClick(arena, pre, intent),
|
||||
else => .passed,
|
||||
else => .{ .result = .passed },
|
||||
};
|
||||
}
|
||||
|
||||
fn verifyFill(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) Result {
|
||||
fn verifyFill(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) VerifyResult {
|
||||
const script = std.fmt.allocPrint(
|
||||
arena,
|
||||
"(function(){{ var el = document.querySelector({s}); return el ? el.value : null; }})()",
|
||||
.{jsonQuote(arena, selector)},
|
||||
) catch return .inconclusive;
|
||||
) catch return .{ .result = .inconclusive };
|
||||
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return .inconclusive;
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return .{ .result = .inconclusive };
|
||||
|
||||
// Secret values ($LP_*): just verify non-empty.
|
||||
if (std.mem.indexOf(u8, expected_value, "$LP_") != null) {
|
||||
return if (actual.len == 0 or std.mem.eql(u8, actual, "null")) .failed else .passed;
|
||||
if (actual.len == 0 or std.mem.eql(u8, actual, "null"))
|
||||
return .{
|
||||
.result = .failed,
|
||||
.reason = std.fmt.allocPrint(arena, "element value is empty after fill (expected non-empty for secret)", .{}) catch null,
|
||||
};
|
||||
return .{ .result = .passed };
|
||||
}
|
||||
|
||||
// Plain values: exact comparison.
|
||||
return if (std.mem.eql(u8, actual, expected_value)) .passed else .failed;
|
||||
if (!std.mem.eql(u8, actual, expected_value))
|
||||
return .{
|
||||
.result = .failed,
|
||||
.reason = std.fmt.allocPrint(arena, "element value is \"{s}\" after fill (expected \"{s}\")", .{ actual, expected_value }) catch null,
|
||||
};
|
||||
return .{ .result = .passed };
|
||||
}
|
||||
|
||||
fn verifyCheck(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) Result {
|
||||
fn verifyCheck(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) VerifyResult {
|
||||
const script = std.fmt.allocPrint(
|
||||
arena,
|
||||
"(function(){{ var el = document.querySelector({s}); return el ? String(el.checked) : null; }})()",
|
||||
.{jsonQuote(arena, selector)},
|
||||
) catch return .inconclusive;
|
||||
) catch return .{ .result = .inconclusive };
|
||||
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return .inconclusive;
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return .{ .result = .inconclusive };
|
||||
const expected_str: []const u8 = if (expected) "true" else "false";
|
||||
return if (std.mem.eql(u8, actual, expected_str)) .passed else .failed;
|
||||
if (!std.mem.eql(u8, actual, expected_str))
|
||||
return .{
|
||||
.result = .failed,
|
||||
.reason = std.fmt.allocPrint(arena, "element checked state is {s} (expected {s})", .{ actual, expected_str }) catch null,
|
||||
};
|
||||
return .{ .result = .passed };
|
||||
}
|
||||
|
||||
fn verifyClick(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) Result {
|
||||
// URL changed → click had an effect
|
||||
fn verifyClick(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) VerifyResult {
|
||||
const current_url = self.tool_executor.getCurrentUrl();
|
||||
if (!std.mem.eql(u8, pre.url, current_url)) return .passed;
|
||||
if (!std.mem.eql(u8, pre.url, current_url)) return .{ .result = .passed };
|
||||
|
||||
// DOM element count changed → click had a visible effect (modal, accordion, etc.)
|
||||
if (pre.dom_element_count) |before_count| {
|
||||
const after_count = self.getDomElementCount(arena);
|
||||
if (after_count) |ac| {
|
||||
if (ac != before_count) return .passed;
|
||||
if (self.getDomElementCount(arena)) |ac| {
|
||||
if (ac != before_count) return .{ .result = .passed };
|
||||
}
|
||||
}
|
||||
|
||||
// URL unchanged, DOM unchanged — check if intent suggests navigation was expected
|
||||
if (intent) |i| {
|
||||
if (containsNavigationIntent(i)) return .failed;
|
||||
if (containsNavigationIntent(i))
|
||||
return .{
|
||||
.result = .failed,
|
||||
.reason = std.fmt.allocPrint(arena, "click had no effect: URL unchanged (still {s}), DOM unchanged, but intent suggests navigation was expected", .{current_url}) catch null,
|
||||
};
|
||||
}
|
||||
|
||||
// No intent, nothing changed — can't tell if this is wrong
|
||||
return .inconclusive;
|
||||
return .{ .result = .inconclusive };
|
||||
}
|
||||
|
||||
fn getDomElementCount(self: *Self, arena: std.mem.Allocator) ?u32 {
|
||||
@@ -116,57 +124,6 @@ fn containsNavigationIntent(intent: []const u8) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn fillFailureReason(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected_value: []const u8) ?[]const u8 {
|
||||
const script = std.fmt.allocPrint(
|
||||
arena,
|
||||
"(function(){{ var el = document.querySelector({s}); return el ? el.value : null; }})()",
|
||||
.{jsonQuote(arena, selector)},
|
||||
) catch return null;
|
||||
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return null;
|
||||
|
||||
if (std.mem.indexOf(u8, expected_value, "$LP_") != null) {
|
||||
if (actual.len == 0 or std.mem.eql(u8, actual, "null"))
|
||||
return std.fmt.allocPrint(arena, "element value is empty after fill (expected non-empty for secret)", .{}) catch null;
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!std.mem.eql(u8, actual, expected_value))
|
||||
return std.fmt.allocPrint(arena, "element value is \"{s}\" after fill (expected \"{s}\")", .{ actual, expected_value }) catch null;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn checkFailureReason(self: *Self, arena: std.mem.Allocator, selector: []const u8, expected: bool) ?[]const u8 {
|
||||
const script = std.fmt.allocPrint(
|
||||
arena,
|
||||
"(function(){{ var el = document.querySelector({s}); return el ? String(el.checked) : null; }})()",
|
||||
.{jsonQuote(arena, selector)},
|
||||
) catch return null;
|
||||
|
||||
const actual = self.tool_executor.callEval(arena, script) orelse return null;
|
||||
const expected_str: []const u8 = if (expected) "true" else "false";
|
||||
if (!std.mem.eql(u8, actual, expected_str))
|
||||
return std.fmt.allocPrint(arena, "element checked state is {s} (expected {s})", .{ actual, expected_str }) catch null;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn clickFailureReason(self: *Self, arena: std.mem.Allocator, pre: PreState, intent: ?[]const u8) ?[]const u8 {
|
||||
const current_url = self.tool_executor.getCurrentUrl();
|
||||
if (!std.mem.eql(u8, pre.url, current_url)) return null; // URL changed, passed
|
||||
|
||||
if (pre.dom_element_count) |before_count| {
|
||||
if (self.getDomElementCount(arena)) |ac| {
|
||||
if (ac != before_count) return null; // DOM changed, passed
|
||||
}
|
||||
}
|
||||
|
||||
if (intent) |i| {
|
||||
if (containsNavigationIntent(i))
|
||||
return std.fmt.allocPrint(arena, "click had no effect: URL unchanged (still {s}), DOM unchanged, but intent suggests navigation was expected", .{current_url}) catch null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn jsonQuote(arena: std.mem.Allocator, s: []const u8) []const u8 {
|
||||
var aw: std.Io.Writer.Allocating = .init(arena);
|
||||
std.json.Stringify.value(s, .{}, &aw.writer) catch return "\"\"";
|
||||
|
||||
Reference in New Issue
Block a user