From a81a24229b2e27be89e7277e0553e7b079d652f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Fri, 3 Apr 2026 08:26:22 +0200 Subject: [PATCH] Add interactive agent mode with LLM-powered web browsing Introduces `lightpanda agent` command that provides a REPL where users can chat with an AI that uses the browser's tools (goto, markdown, click, fill, etc.) to browse the web. Uses zenai for multi-provider LLM support (Anthropic, OpenAI, Gemini) and linenoise v2 for terminal line editing. --- build.zig | 15 ++ build.zig.zon | 7 + src/Config.zig | 125 +++++++++-- src/agent.zig | 3 + src/agent/Agent.zig | 242 ++++++++++++++++++++ src/agent/Terminal.zig | 64 ++++++ src/agent/ToolExecutor.zig | 442 +++++++++++++++++++++++++++++++++++++ src/lightpanda.zig | 1 + src/main.zig | 20 ++ 9 files changed, 903 insertions(+), 16 deletions(-) create mode 100644 src/agent.zig create mode 100644 src/agent/Agent.zig create mode 100644 src/agent/Terminal.zig create mode 100644 src/agent/ToolExecutor.zig diff --git a/build.zig b/build.zig index d5b06794..81c50c9e 100644 --- a/build.zig +++ b/build.zig @@ -85,6 +85,8 @@ pub fn build(b: *Build) !void { try linkV8(b, mod, enable_asan, enable_tsan, prebuilt_v8_path); try linkCurl(b, mod, enable_tsan); try linkHtml5Ever(b, mod); + linkZenai(b, mod); + linkLinenoise(b, mod); break :blk mod; }; @@ -750,6 +752,19 @@ fn buildCurl( return lib; } +fn linkZenai(b: *Build, mod: *Build.Module) void { + const dep = b.dependency("zenai", .{}); + mod.addImport("zenai", dep.module("zenai")); +} + +fn linkLinenoise(b: *Build, mod: *Build.Module) void { + const dep = b.dependency("linenoise", .{}); + mod.addIncludePath(dep.path("")); + mod.addCSourceFile(.{ + .file = dep.path("linenoise.c"), + }); +} + /// Resolves the semantic version of the build. /// /// The base version is read from `build.zig.zon`. This can be overridden diff --git a/build.zig.zon b/build.zig.zon index f6c231bb..3c1ff21e 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -30,6 +30,13 @@ .url = "https://github.com/curl/curl/releases/download/curl-8_18_0/curl-8.18.0.tar.gz", .hash = "N-V-__8AALp9QAGn6CCHZ6fK_FfMyGtG824LSHYHHasM3w-y", }, + .zenai = .{ + .path = "../zenai", + }, + .linenoise = .{ + .url = "https://github.com/antirez/linenoise/archive/refs/tags/2.0.tar.gz", + .hash = "N-V-__8AAJ4HAgCX79UDBfNwhqAqUVoGC44ib6UYa18q6oa_", + }, }, .paths = .{""}, } diff --git a/src/Config.zig b/src/Config.zig index 6788db1d..02906b12 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -32,6 +32,7 @@ pub const RunMode = enum { serve, version, mcp, + agent, }; pub const CDP_MAX_HTTP_REQUEST_SIZE = 4096; @@ -63,56 +64,56 @@ pub fn deinit(self: *const Config, allocator: Allocator) void { pub fn tlsVerifyHost(self: *const Config) bool { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.tls_verify_host, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.tls_verify_host, else => unreachable, }; } pub fn obeyRobots(self: *const Config) bool { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.obey_robots, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.obey_robots, else => unreachable, }; } pub fn httpProxy(self: *const Config) ?[:0]const u8 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_proxy, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_proxy, else => unreachable, }; } pub fn proxyBearerToken(self: *const Config) ?[:0]const u8 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.proxy_bearer_token, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.proxy_bearer_token, .help, .version => null, }; } pub fn httpMaxConcurrent(self: *const Config) u8 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_max_concurrent orelse 10, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_concurrent orelse 10, else => unreachable, }; } pub fn httpMaxHostOpen(self: *const Config) u8 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_max_host_open orelse 4, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_host_open orelse 4, else => unreachable, }; } pub fn httpConnectTimeout(self: *const Config) u31 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_connect_timeout orelse 0, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_connect_timeout orelse 0, else => unreachable, }; } pub fn httpTimeout(self: *const Config) u31 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_timeout orelse 5000, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_timeout orelse 5000, else => unreachable, }; } @@ -123,35 +124,35 @@ pub fn httpMaxRedirects(_: *const Config) u8 { pub fn httpMaxResponseSize(self: *const Config) ?usize { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.http_max_response_size, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_response_size, else => unreachable, }; } pub fn logLevel(self: *const Config) ?log.Level { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.log_level, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_level, else => unreachable, }; } pub fn logFormat(self: *const Config) ?log.Format { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.log_format, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_format, else => unreachable, }; } pub fn logFilterScopes(self: *const Config) ?[]const log.Scope { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.log_filter_scopes, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_filter_scopes, else => unreachable, }; } pub fn userAgentSuffix(self: *const Config) ?[]const u8 { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| opts.common.user_agent_suffix, + inline .serve, .fetch, .mcp, .agent => |opts| opts.common.user_agent_suffix, .help, .version => null, }; } @@ -189,7 +190,7 @@ pub fn advertiseHost(self: *const Config) []const u8 { pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig { return switch (self.mode) { - inline .serve, .fetch, .mcp => |opts| WebBotAuthConfig{ + inline .serve, .fetch, .mcp, .agent => |opts| WebBotAuthConfig{ .key_file = opts.common.web_bot_auth_key_file orelse return null, .keyid = opts.common.web_bot_auth_keyid orelse return null, .domain = opts.common.web_bot_auth_domain orelse return null, @@ -220,6 +221,7 @@ pub const Mode = union(RunMode) { serve: Serve, version: void, mcp: Mcp, + agent: Agent, }; pub const Serve = struct { @@ -238,6 +240,20 @@ pub const Mcp = struct { cdp_port: ?u16 = null, }; +pub const AiProvider = enum { + anthropic, + openai, + gemini, +}; + +pub const Agent = struct { + common: Common = .{}, + provider: AiProvider = .anthropic, + model: ?[:0]const u8 = null, + api_key: ?[:0]const u8 = null, + system_prompt: ?[:0]const u8 = null, +}; + pub const DumpFormat = enum { html, markdown, @@ -411,7 +427,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { const usage = \\usage: {s} command [options] [URL] \\ - \\Command can be either 'fetch', 'serve', 'mcp' or 'help' + \\Command can be either 'fetch', 'serve', 'mcp', 'agent' or 'help' \\ \\fetch command \\Fetches the specified URL @@ -493,6 +509,24 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ Valid: 2024-11-05, 2025-03-26, 2025-06-18, 2025-11-25. \\ Defaults to "2024-11-05". \\ + ++ common_options ++ + \\ + \\agent command + \\Starts an interactive AI agent that can browse the web + \\Example: {s} agent --provider anthropic --model claude-sonnet-4-20250514 + \\ + \\Options: + \\--provider The AI provider: anthropic, openai, or gemini. + \\ Defaults to "anthropic". + \\ + \\--model The model name to use. + \\ Defaults to a sensible default per provider. + \\ + \\--api-key The API key. Can also be set via environment variable: + \\ ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY. + \\ + \\--system-prompt Override the default system prompt. + \\ ++ common_options ++ \\ \\version command @@ -502,7 +536,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\Displays this message \\ ; - std.debug.print(usage, .{ self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name }); + std.debug.print(usage, .{ self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name }); if (success) { return std.process.cleanExit(); } @@ -539,6 +573,8 @@ pub fn parseArgs(allocator: Allocator) !Config { return init(allocator, exec_name, .{ .help = false }) }, .mcp => .{ .mcp = parseMcpArgs(allocator, &args) catch return init(allocator, exec_name, .{ .help = false }) }, + .agent => .{ .agent = parseAgentArgs(allocator, &args) catch + return init(allocator, exec_name, .{ .help = false }) }, .version => .{ .version = {} }, }; return init(allocator, exec_name, mode); @@ -884,6 +920,63 @@ fn parseFetchArgs( }; } +fn parseAgentArgs( + allocator: Allocator, + args: *std.process.ArgIterator, +) !Agent { + var result: Agent = .{}; + + while (args.next()) |opt| { + if (std.mem.eql(u8, "--provider", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = opt }); + return error.InvalidArgument; + }; + result.provider = std.meta.stringToEnum(AiProvider, str) orelse { + log.fatal(.app, "invalid provider", .{ .arg = opt, .val = str }); + return error.InvalidArgument; + }; + continue; + } + + if (std.mem.eql(u8, "--model", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = opt }); + return error.InvalidArgument; + }; + result.model = try allocator.dupeZ(u8, str); + continue; + } + + if (std.mem.eql(u8, "--api-key", opt) or std.mem.eql(u8, "--api_key", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = opt }); + return error.InvalidArgument; + }; + result.api_key = try allocator.dupeZ(u8, str); + continue; + } + + if (std.mem.eql(u8, "--system-prompt", opt) or std.mem.eql(u8, "--system_prompt", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = opt }); + return error.InvalidArgument; + }; + result.system_prompt = try allocator.dupeZ(u8, str); + continue; + } + + if (try parseCommonArg(allocator, opt, args, &result.common)) { + continue; + } + + log.fatal(.app, "unknown argument", .{ .mode = "agent", .arg = opt }); + return error.UnkownOption; + } + + return result; +} + fn parseCommonArg( allocator: Allocator, opt: []const u8, diff --git a/src/agent.zig b/src/agent.zig new file mode 100644 index 00000000..ff4d537e --- /dev/null +++ b/src/agent.zig @@ -0,0 +1,3 @@ +pub const Agent = @import("agent/Agent.zig"); +pub const ToolExecutor = @import("agent/ToolExecutor.zig"); +pub const Terminal = @import("agent/Terminal.zig"); diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig new file mode 100644 index 00000000..a43134b0 --- /dev/null +++ b/src/agent/Agent.zig @@ -0,0 +1,242 @@ +const std = @import("std"); +const zenai = @import("zenai"); +const lp = @import("lightpanda"); + +const log = lp.log; +const Config = lp.Config; +const App = @import("../App.zig"); +const ToolExecutor = @import("ToolExecutor.zig"); +const Terminal = @import("Terminal.zig"); + +const Self = @This(); + +const default_system_prompt = + \\You are a web browsing assistant powered by the Lightpanda browser. + \\You can navigate to websites, read their content, interact with forms, + \\click links, and extract information. + \\ + \\When helping the user, navigate to relevant pages and extract information. + \\Use the semantic_tree or interactiveElements tools to understand page structure + \\before clicking or filling forms. Be concise in your responses. +; + +allocator: std.mem.Allocator, +ai_client: AiClient, +tool_executor: *ToolExecutor, +terminal: Terminal, +messages: std.ArrayListUnmanaged(zenai.provider.Message), +tools: []const zenai.provider.Tool, +model: []const u8, +system_prompt: []const u8, + +const AiClient = union(Config.AiProvider) { + anthropic: *zenai.anthropic.Client, + openai: *zenai.openai.Client, + gemini: *zenai.gemini.Client, + + fn toProvider(self: AiClient) zenai.provider.Client { + return switch (self) { + .anthropic => |c| .{ .anthropic = c }, + .openai => |c| .{ .openai = c }, + .gemini => |c| .{ .gemini = c }, + }; + } +}; + +pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Self { + const api_key = opts.api_key orelse getEnvApiKey(opts.provider) orelse { + log.fatal(.app, "missing API key", .{ + .hint = "Set the API key via --api-key or environment variable", + }); + return error.MissingApiKey; + }; + + const tool_executor = try ToolExecutor.init(allocator, app); + errdefer tool_executor.deinit(); + + const self = try allocator.create(Self); + errdefer allocator.destroy(self); + + const ai_client: AiClient = switch (opts.provider) { + .anthropic => blk: { + const client = try allocator.create(zenai.anthropic.Client); + client.* = zenai.anthropic.Client.init(allocator, api_key, .{}); + break :blk .{ .anthropic = client }; + }, + .openai => blk: { + const client = try allocator.create(zenai.openai.Client); + client.* = zenai.openai.Client.init(allocator, api_key, .{}); + break :blk .{ .openai = client }; + }, + .gemini => blk: { + const client = try allocator.create(zenai.gemini.Client); + client.* = zenai.gemini.Client.init(allocator, api_key, .{}); + break :blk .{ .gemini = client }; + }, + }; + + const tools = tool_executor.getTools() catch { + log.fatal(.app, "failed to initialize tools", .{}); + return error.ToolInitFailed; + }; + + self.* = .{ + .allocator = allocator, + .ai_client = ai_client, + .tool_executor = tool_executor, + .terminal = Terminal.init(null), + .messages = .empty, + .tools = tools, + .model = opts.model orelse defaultModel(opts.provider), + .system_prompt = opts.system_prompt orelse default_system_prompt, + }; + + return self; +} + +pub fn deinit(self: *Self) void { + self.messages.deinit(self.allocator); + self.tool_executor.deinit(); + switch (self.ai_client) { + inline else => |c| { + c.deinit(); + self.allocator.destroy(c); + }, + } + self.allocator.destroy(self); +} + +pub fn run(self: *Self) void { + self.terminal.printInfo("Lightpanda Agent (type 'quit' to exit)"); + self.terminal.printInfo(std.fmt.allocPrint(self.allocator, "Provider: {s}, Model: {s}", .{ + @tagName(std.meta.activeTag(self.ai_client)), + self.model, + }) catch "Ready."); + + while (true) { + const line = self.terminal.readLine("\x1b[1m> \x1b[0m") orelse break; + defer self.terminal.freeLine(line); + + if (line.len == 0) continue; + if (std.mem.eql(u8, line, "quit") or std.mem.eql(u8, line, "exit")) break; + + self.processUserMessage(line) catch |err| { + const msg = std.fmt.allocPrint(self.allocator, "Request failed: {s}", .{@errorName(err)}) catch "Request failed"; + self.terminal.printError(msg); + }; + } + + self.terminal.printInfo("Goodbye!"); +} + +fn processUserMessage(self: *Self, user_input: []const u8) !void { + // Add system prompt as first message if this is the first user message + if (self.messages.items.len == 0) { + try self.messages.append(self.allocator, .{ + .role = .system, + .content = self.system_prompt, + }); + } + + // Add user message + try self.messages.append(self.allocator, .{ + .role = .user, + .content = try self.allocator.dupe(u8, user_input), + }); + + // Loop: send to LLM, execute tool calls, repeat until we get text + var max_iterations: u32 = 20; + while (max_iterations > 0) : (max_iterations -= 1) { + const provider_client = self.ai_client.toProvider(); + var result = provider_client.generateContent(self.model, self.messages.items, .{ + .tools = self.tools, + .max_tokens = 4096, + }) catch |err| { + log.err(.app, "AI API error", .{ .err = err }); + return error.ApiError; + }; + defer result.deinit(); + + // Handle tool calls + if (result.finish_reason == .tool_call) { + if (result.tool_calls) |tool_calls| { + // Add the assistant message with tool calls + try self.messages.append(self.allocator, .{ + .role = .assistant, + .content = if (result.text) |t| try self.allocator.dupe(u8, t) else null, + .tool_calls = try self.dupeToolCalls(tool_calls), + }); + + // Execute each tool call and collect results + var tool_results: std.ArrayListUnmanaged(zenai.provider.ToolResult) = .empty; + defer tool_results.deinit(self.allocator); + + for (tool_calls) |tc| { + self.terminal.printToolCall(tc.name, tc.arguments); + + var tool_arena = std.heap.ArenaAllocator.init(self.allocator); + defer tool_arena.deinit(); + + const tool_result = self.tool_executor.call(tool_arena.allocator(), tc.name, tc.arguments) catch "Error: tool execution failed"; + self.terminal.printToolResult(tc.name, tool_result); + + try tool_results.append(self.allocator, .{ + .id = try self.allocator.dupe(u8, tc.id), + .name = try self.allocator.dupe(u8, tc.name), + .content = try self.allocator.dupe(u8, tool_result), + }); + } + + // Add tool results as a message + try self.messages.append(self.allocator, .{ + .role = .tool, + .tool_results = try tool_results.toOwnedSlice(self.allocator), + }); + + continue; + } + } + + // Text response + if (result.text) |text| { + std.debug.print("\n", .{}); + self.terminal.printAssistant(text); + std.debug.print("\n\n", .{}); + + try self.messages.append(self.allocator, .{ + .role = .assistant, + .content = try self.allocator.dupe(u8, text), + }); + } + + break; + } +} + +fn dupeToolCalls(self: *Self, calls: []const zenai.provider.ToolCall) ![]const zenai.provider.ToolCall { + const duped = try self.allocator.alloc(zenai.provider.ToolCall, calls.len); + for (calls, 0..) |tc, i| { + duped[i] = .{ + .id = try self.allocator.dupe(u8, tc.id), + .name = try self.allocator.dupe(u8, tc.name), + .arguments = try self.allocator.dupe(u8, tc.arguments), + }; + } + return duped; +} + +fn getEnvApiKey(provider_type: Config.AiProvider) ?[:0]const u8 { + return switch (provider_type) { + .anthropic => std.posix.getenv("ANTHROPIC_API_KEY"), + .openai => std.posix.getenv("OPENAI_API_KEY"), + .gemini => std.posix.getenv("GOOGLE_API_KEY") orelse std.posix.getenv("GEMINI_API_KEY"), + }; +} + +fn defaultModel(provider_type: Config.AiProvider) []const u8 { + return switch (provider_type) { + .anthropic => "claude-sonnet-4-20250514", + .openai => "gpt-4o", + .gemini => "gemini-2.5-flash", + }; +} diff --git a/src/agent/Terminal.zig b/src/agent/Terminal.zig new file mode 100644 index 00000000..1d0e8beb --- /dev/null +++ b/src/agent/Terminal.zig @@ -0,0 +1,64 @@ +const std = @import("std"); +const c = @cImport({ + @cInclude("linenoise.h"); +}); + +const Self = @This(); + +const ansi_reset = "\x1b[0m"; +const ansi_bold = "\x1b[1m"; +const ansi_dim = "\x1b[2m"; +const ansi_cyan = "\x1b[36m"; +const ansi_green = "\x1b[32m"; +const ansi_yellow = "\x1b[33m"; +const ansi_red = "\x1b[31m"; + +history_path: ?[:0]const u8, + +pub fn init(history_path: ?[:0]const u8) Self { + c.linenoiseSetMultiLine(1); + const self = Self{ .history_path = history_path }; + if (history_path) |path| { + _ = c.linenoiseHistoryLoad(path.ptr); + } + return self; +} + +pub fn readLine(self: *Self, prompt: [*:0]const u8) ?[]const u8 { + const line = c.linenoise(prompt) orelse return null; + const slice = std.mem.sliceTo(line, 0); + if (slice.len > 0) { + _ = c.linenoiseHistoryAdd(line); + if (self.history_path) |path| { + _ = c.linenoiseHistorySave(path.ptr); + } + } + return slice; +} + +pub fn freeLine(_: *Self, line: []const u8) void { + c.linenoiseFree(@ptrCast(@constCast(line.ptr))); +} + +pub fn printAssistant(_: *Self, text: []const u8) void { + const fd = std.posix.STDOUT_FILENO; + _ = std.posix.write(fd, text) catch {}; +} + +pub fn printToolCall(_: *Self, name: []const u8, args: []const u8) void { + std.debug.print("\n{s}{s}[tool: {s}]{s} {s}\n", .{ ansi_dim, ansi_cyan, name, ansi_reset, args }); +} + +pub fn printToolResult(_: *Self, name: []const u8, result: []const u8) void { + const truncated = if (result.len > 500) result[0..500] else result; + const ellipsis: []const u8 = if (result.len > 500) "..." else ""; + std.debug.print("{s}{s}[result: {s}]{s} {s}{s}\n", .{ ansi_dim, ansi_green, name, ansi_reset, truncated, ellipsis }); +} + +pub fn printError(_: *Self, msg: []const u8) void { + std.debug.print("{s}{s}Error: {s}{s}\n", .{ ansi_bold, ansi_red, msg, ansi_reset }); +} + +pub fn printInfo(_: *Self, msg: []const u8) void { + std.debug.print("{s}{s}{s}\n", .{ ansi_dim, msg, ansi_reset }); +} diff --git a/src/agent/ToolExecutor.zig b/src/agent/ToolExecutor.zig new file mode 100644 index 00000000..daa42b52 --- /dev/null +++ b/src/agent/ToolExecutor.zig @@ -0,0 +1,442 @@ +const std = @import("std"); +const lp = @import("lightpanda"); +const zenai = @import("zenai"); + +const App = @import("../App.zig"); +const HttpClient = @import("../browser/HttpClient.zig"); +const CDPNode = @import("../cdp/Node.zig"); +const mcp_tools = @import("../mcp/tools.zig"); +const protocol = @import("../mcp/protocol.zig"); + +const Self = @This(); + +allocator: std.mem.Allocator, +app: *App, +http_client: *HttpClient, +notification: *lp.Notification, +browser: lp.Browser, +session: *lp.Session, +node_registry: CDPNode.Registry, + +pub fn init(allocator: std.mem.Allocator, app: *App) !*Self { + const http_client = try HttpClient.init(allocator, &app.network); + errdefer http_client.deinit(); + + const notification = try lp.Notification.init(allocator); + errdefer notification.deinit(); + + const self = try allocator.create(Self); + errdefer allocator.destroy(self); + + var browser = try lp.Browser.init(app, .{ .http_client = http_client }); + errdefer browser.deinit(); + + self.* = .{ + .allocator = allocator, + .app = app, + .http_client = http_client, + .notification = notification, + .browser = browser, + .session = undefined, + .node_registry = CDPNode.Registry.init(allocator), + }; + + self.session = try self.browser.newSession(self.notification); + return self; +} + +pub fn deinit(self: *Self) void { + self.node_registry.deinit(); + self.browser.deinit(); + self.notification.deinit(); + self.http_client.deinit(); + self.allocator.destroy(self); +} + +/// Returns the list of tools in zenai provider.Tool format. +pub fn getTools(self: *Self) ![]const zenai.provider.Tool { + const tools = try self.allocator.alloc(zenai.provider.Tool, mcp_tools.tool_list.len); + for (mcp_tools.tool_list, 0..) |t, i| { + const parsed = try std.json.parseFromSlice( + std.json.Value, + self.allocator, + t.inputSchema, + .{}, + ); + tools[i] = .{ + .name = t.name, + .description = t.description orelse "", + .parameters = parsed.value, + }; + } + return tools; +} + +/// Execute a tool by name with JSON arguments, returning the result as a string. +pub fn call(self: *Self, arena: std.mem.Allocator, tool_name: []const u8, arguments_json: []const u8) ![]const u8 { + const arguments = if (arguments_json.len > 0) + (std.json.parseFromSlice(std.json.Value, arena, arguments_json, .{}) catch + return "Error: invalid JSON arguments").value + else + null; + + const Action = enum { + goto, + navigate, + markdown, + links, + nodeDetails, + interactiveElements, + structuredData, + detectForms, + evaluate, + eval, + semantic_tree, + click, + fill, + scroll, + waitForSelector, + }; + + const action_map = std.StaticStringMap(Action).initComptime(.{ + .{ "goto", .goto }, + .{ "navigate", .navigate }, + .{ "markdown", .markdown }, + .{ "links", .links }, + .{ "nodeDetails", .nodeDetails }, + .{ "interactiveElements", .interactiveElements }, + .{ "structuredData", .structuredData }, + .{ "detectForms", .detectForms }, + .{ "evaluate", .evaluate }, + .{ "eval", .eval }, + .{ "semantic_tree", .semantic_tree }, + .{ "click", .click }, + .{ "fill", .fill }, + .{ "scroll", .scroll }, + .{ "waitForSelector", .waitForSelector }, + }); + + const action = action_map.get(tool_name) orelse return "Error: unknown tool"; + + return switch (action) { + .goto, .navigate => self.execGoto(arena, arguments), + .markdown => self.execMarkdown(arena, arguments), + .links => self.execLinks(arena, arguments), + .nodeDetails => self.execNodeDetails(arena, arguments), + .interactiveElements => self.execInteractiveElements(arena, arguments), + .structuredData => self.execStructuredData(arena, arguments), + .detectForms => self.execDetectForms(arena, arguments), + .evaluate, .eval => self.execEvaluate(arena, arguments), + .semantic_tree => self.execSemanticTree(arena, arguments), + .click => self.execClick(arena, arguments), + .fill => self.execFill(arena, arguments), + .scroll => self.execScroll(arena, arguments), + .waitForSelector => self.execWaitForSelector(arena, arguments), + }; +} + +fn execGoto(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const GotoParams = struct { + url: [:0]const u8, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrErr(GotoParams, arena, arguments) orelse return "Error: missing or invalid 'url' argument"; + self.performGoto(args.url, args.timeout, args.waitUntil) catch return "Error: navigation failed"; + return "Navigated successfully."; +} + +fn execMarkdown(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const UrlParams = struct { + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(UrlParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + var aw: std.Io.Writer.Allocating = .init(arena); + lp.markdown.dump(page.window._document.asNode(), .{}, &aw.writer, page) catch return "Error: failed to generate markdown"; + return aw.written(); +} + +fn execLinks(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const UrlParams = struct { + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(UrlParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + const links_list = lp.links.collectLinks(arena, page.window._document.asNode(), page) catch + return "Error: failed to collect links"; + + var aw: std.Io.Writer.Allocating = .init(arena); + for (links_list, 0..) |href, i| { + if (i > 0) aw.writer.writeByte('\n') catch {}; + aw.writer.writeAll(href) catch {}; + } + return aw.written(); +} + +fn execNodeDetails(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { backendNodeId: CDPNode.Id }; + const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId"; + + _ = self.session.currentPage() orelse return "Error: page not loaded"; + + const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse + return "Error: node not found"; + + const page = self.session.currentPage().?; + const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &self.node_registry, page) catch + return "Error: failed to get node details"; + + var aw: std.Io.Writer.Allocating = .init(arena); + std.json.Stringify.value(&details, .{}, &aw.writer) catch return "Error: serialization failed"; + return aw.written(); +} + +fn execInteractiveElements(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const UrlParams = struct { + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(UrlParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + const elements = lp.interactive.collectInteractiveElements(page.window._document.asNode(), arena, page) catch + return "Error: failed to collect interactive elements"; + lp.interactive.registerNodes(elements, &self.node_registry) catch + return "Error: failed to register nodes"; + + var aw: std.Io.Writer.Allocating = .init(arena); + std.json.Stringify.value(elements, .{}, &aw.writer) catch return "Error: serialization failed"; + return aw.written(); +} + +fn execStructuredData(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const UrlParams = struct { + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(UrlParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + const data = lp.structured_data.collectStructuredData(page.window._document.asNode(), arena, page) catch + return "Error: failed to collect structured data"; + var aw: std.Io.Writer.Allocating = .init(arena); + std.json.Stringify.value(data, .{}, &aw.writer) catch return "Error: serialization failed"; + return aw.written(); +} + +fn execDetectForms(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const UrlParams = struct { + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(UrlParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + const forms_data = lp.forms.collectForms(arena, page.window._document.asNode(), page) catch + return "Error: failed to collect forms"; + lp.forms.registerNodes(forms_data, &self.node_registry) catch + return "Error: failed to register form nodes"; + + var aw: std.Io.Writer.Allocating = .init(arena); + std.json.Stringify.value(forms_data, .{}, &aw.writer) catch return "Error: serialization failed"; + return aw.written(); +} + +fn execEvaluate(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { + script: [:0]const u8, + url: ?[:0]const u8 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing 'script' argument"; + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + var ls: lp.js.Local.Scope = undefined; + page.js.localScope(&ls); + defer ls.deinit(); + + var try_catch: lp.js.TryCatch = undefined; + try_catch.init(&ls.local); + defer try_catch.deinit(); + + const js_result = ls.local.compileAndRun(args.script, null) catch |err| { + const caught = try_catch.caughtOrError(arena, err); + var aw: std.Io.Writer.Allocating = .init(arena); + caught.format(&aw.writer) catch {}; + return aw.written(); + }; + + return js_result.toStringSliceWithAlloc(arena) catch "undefined"; +} + +fn execSemanticTree(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const TreeParams = struct { + url: ?[:0]const u8 = null, + backendNodeId: ?u32 = null, + maxDepth: ?u32 = null, + timeout: ?u32 = null, + waitUntil: ?lp.Config.WaitUntil = null, + }; + const args = parseArgsOrDefault(TreeParams, arena, arguments); + const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded"; + + var root_node = page.window._document.asNode(); + if (args.backendNodeId) |node_id| { + if (self.node_registry.lookup_by_id.get(node_id)) |n| { + root_node = n.dom; + } + } + + const st = lp.SemanticTree{ + .dom_node = root_node, + .registry = &self.node_registry, + .page = page, + .arena = arena, + .prune = true, + .max_depth = args.maxDepth orelse std.math.maxInt(u32) - 1, + }; + + var aw: std.Io.Writer.Allocating = .init(arena); + st.textStringify(&aw.writer) catch return "Error: failed to generate semantic tree"; + return aw.written(); +} + +fn execClick(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { backendNodeId: CDPNode.Id }; + const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId"; + + const page = self.session.currentPage() orelse return "Error: page not loaded"; + const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse return "Error: node not found"; + + lp.actions.click(node.dom, page) catch |err| { + if (err == error.InvalidNodeType) return "Error: node is not an HTML element"; + return "Error: failed to click element"; + }; + + const page_title = page.getTitle() catch null; + return std.fmt.allocPrint(arena, "Clicked element (backendNodeId: {d}). Page url: {s}, title: {s}", .{ + args.backendNodeId, + page.url, + page_title orelse "(none)", + }) catch "Clicked element."; +} + +fn execFill(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { + backendNodeId: CDPNode.Id, + text: []const u8, + }; + const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId or text"; + + const page = self.session.currentPage() orelse return "Error: page not loaded"; + const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse return "Error: node not found"; + + lp.actions.fill(node.dom, args.text, page) catch |err| { + if (err == error.InvalidNodeType) return "Error: node is not an input, textarea or select"; + return "Error: failed to fill element"; + }; + + const page_title = page.getTitle() catch null; + return std.fmt.allocPrint(arena, "Filled element (backendNodeId: {d}) with \"{s}\". Page url: {s}, title: {s}", .{ + args.backendNodeId, + args.text, + page.url, + page_title orelse "(none)", + }) catch "Filled element."; +} + +fn execScroll(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { + backendNodeId: ?CDPNode.Id = null, + x: ?i32 = null, + y: ?i32 = null, + }; + const args = parseArgsOrDefault(Params, arena, arguments); + const page = self.session.currentPage() orelse return "Error: page not loaded"; + + var target_node: ?*@import("../browser/webapi/Node.zig") = null; + if (args.backendNodeId) |node_id| { + const node = self.node_registry.lookup_by_id.get(node_id) orelse return "Error: node not found"; + target_node = node.dom; + } + + lp.actions.scroll(target_node, args.x, args.y, page) catch |err| { + if (err == error.InvalidNodeType) return "Error: node is not an element"; + return "Error: failed to scroll"; + }; + + const page_title = page.getTitle() catch null; + return std.fmt.allocPrint(arena, "Scrolled to x: {d}, y: {d}. Page url: {s}, title: {s}", .{ + args.x orelse 0, + args.y orelse 0, + page.url, + page_title orelse "(none)", + }) catch "Scrolled."; +} + +fn execWaitForSelector(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 { + const Params = struct { + selector: [:0]const u8, + timeout: ?u32 = null, + }; + const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing 'selector' argument"; + + _ = self.session.currentPage() orelse return "Error: page not loaded"; + + const timeout_ms = args.timeout orelse 5000; + + const node = lp.actions.waitForSelector(args.selector, timeout_ms, self.session) catch |err| { + if (err == error.InvalidSelector) return "Error: invalid selector"; + if (err == error.Timeout) return "Error: timeout waiting for selector"; + return "Error: failed waiting for selector"; + }; + + const registered = self.node_registry.register(node) catch return "Element found."; + return std.fmt.allocPrint(arena, "Element found. backendNodeId: {d}", .{registered.id}) catch "Element found."; +} + +fn ensurePage(self: *Self, url: ?[:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !*lp.Page { + if (url) |u| { + try self.performGoto(u, timeout, waitUntil); + } + return self.session.currentPage() orelse error.PageNotLoaded; +} + +fn performGoto(self: *Self, url: [:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !void { + const session = self.session; + if (session.page != null) { + session.removePage(); + } + const page = try session.createPage(); + _ = try page.navigate(url, .{ + .reason = .address_bar, + .kind = .{ .push = null }, + }); + + var runner = try session.runner(.{}); + try runner.wait(.{ + .ms = timeout orelse 10000, + .until = waitUntil orelse .done, + }); +} + +fn parseArgsOrDefault(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value) T { + const args_raw = arguments orelse return .{}; + return std.json.parseFromValueLeaky(T, arena, args_raw, .{ .ignore_unknown_fields = true }) catch .{}; +} + +fn parseArgsOrErr(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value) ?T { + const args_raw = arguments orelse return null; + return std.json.parseFromValueLeaky(T, arena, args_raw, .{ .ignore_unknown_fields = true }) catch null; +} diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 5859324d..37ec9c44 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -40,6 +40,7 @@ pub const forms = @import("browser/forms.zig"); pub const actions = @import("browser/actions.zig"); pub const structured_data = @import("browser/structured_data.zig"); pub const mcp = @import("mcp.zig"); +pub const agent = @import("agent.zig"); pub const build_config = @import("build_config"); pub const crash_handler = @import("crash_handler.zig"); diff --git a/src/main.zig b/src/main.zig index c10c3b4b..c90b81c2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -165,10 +165,30 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { app.network.run(); }, + .agent => |opts| { + log.info(.app, "starting agent", .{}); + + var worker_thread = try std.Thread.spawn(.{}, agentThread, .{ allocator, app, opts }); + defer worker_thread.join(); + + app.network.run(); + }, else => unreachable, } } +fn agentThread(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) void { + defer app.network.stop(); + + var agent_instance = lp.agent.Agent.init(allocator, app, opts) catch |err| { + log.fatal(.app, "agent init error", .{ .err = err }); + return; + }; + defer agent_instance.deinit(); + + agent_instance.run(); +} + fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void { defer app.network.stop(); lp.fetch(app, url, fetch_opts) catch |err| {