diff --git a/src/browser/tools.zig b/src/browser/tools.zig index c17deb50..26321c71 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -36,6 +36,7 @@ pub const Tool = enum { goto, search, markdown, + html, links, eval, extract, @@ -65,7 +66,7 @@ pub const Tool = enum { pub fn isRecorded(self: Tool) bool { return switch (self) { .goto, .eval, .extract, .click, .fill, .scroll, .waitForSelector, .waitForScript, .hover, .press, .selectOption, .setChecked => true, - .search, .markdown, .links, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, + .search, .markdown, .html, .links, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, }; } @@ -75,7 +76,7 @@ pub const Tool = enum { pub fn canHeal(self: Tool) bool { return switch (self) { .click, .fill, .scroll, .waitForSelector, .waitForScript, .hover, .press, .selectOption, .setChecked, .extract => true, - .goto, .search, .markdown, .links, .eval, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, + .goto, .search, .markdown, .html, .links, .eval, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, }; } @@ -85,7 +86,7 @@ pub const Tool = enum { pub fn needsLocator(self: Tool) bool { return switch (self) { .click, .fill, .hover, .selectOption, .setChecked => true, - .goto, .search, .markdown, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .scroll, .waitForSelector, .waitForScript, .press, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, + .goto, .search, .markdown, .html, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .scroll, .waitForSelector, .waitForScript, .press, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, }; } @@ -93,7 +94,7 @@ pub const Tool = enum { /// markdown, eval return value) rather than a status line on stderr. pub fn producesData(self: Tool) bool { return switch (self) { - .search, .markdown, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => true, + .search, .markdown, .html, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => true, .goto, .click, .fill, .scroll, .waitForSelector, .waitForScript, .hover, .press, .selectOption, .setChecked => false, }; } @@ -103,7 +104,7 @@ pub const Tool = enum { pub fn isRetryable(self: Tool) bool { return switch (self) { .fill, .setChecked, .selectOption => true, - .goto, .search, .markdown, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .click, .scroll, .waitForSelector, .waitForScript, .hover, .press, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, + .goto, .search, .markdown, .html, .links, .eval, .extract, .tree, .nodeDetails, .interactiveElements, .structuredData, .detectForms, .click, .scroll, .waitForSelector, .waitForScript, .hover, .press, .findElement, .consoleLogs, .getUrl, .getCookies, .getEnv => false, }; } @@ -152,6 +153,10 @@ pub const Tool = enum { .description = "Get the page content in markdown format. If a url is provided, it navigates to that url first.", .input_schema = url_params_schema, }, + .html => .{ + .description = "Dump the full raw HTML of the current page (doctype + document element). If a url is provided, it navigates to that url first. Prefer `markdown` or `tree` for LLM consumption — `html` is verbose and noisy. Use it for debugging or capturing fixtures.", + .input_schema = url_params_schema, + }, .links => .{ .description = "Extract all links in the opened page. If a url is provided, it navigates to that url first.", .input_schema = url_params_schema, @@ -553,6 +558,7 @@ fn dispatch( .goto => .{ .text = try execGoto(arena, session, registry, substituted) }, .search => .{ .text = try execSearch(arena, session, registry, substituted) }, .markdown => .{ .text = try execMarkdown(arena, session, registry, substituted) }, + .html => .{ .text = try execHtml(arena, session, registry, substituted) }, .links => .{ .text = try execLinks(arena, session, registry, substituted) }, .tree => .{ .text = try execTree(arena, session, registry, substituted) }, .nodeDetails => .{ .text = try execNodeDetails(arena, session, registry, substituted) }, @@ -753,6 +759,14 @@ fn execMarkdown(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNo return renderFrameMarkdown(arena, page); } +fn execHtml(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 { + const args = try parseArgsOrDefault(UrlParams, arena, arguments); + const page = try ensurePage(session, registry, args.url, args.timeout, args.waitUntil); + var aw: std.Io.Writer.Allocating = .init(arena); + lp.dump.root(page.document, .{}, &aw.writer, page) catch return ToolError.InternalError; + return aw.written(); +} + fn execLinks(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 { const args = try parseArgsOrDefault(UrlParams, arena, arguments); const page = try ensurePage(session, registry, args.url, args.timeout, args.waitUntil); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index ffd14b15..a0d1ec20 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -898,6 +898,21 @@ test "MCP - waitForSelector: timeout" { }, out.written()); } +test "MCP - html dumps doctype + document element" { + defer testing.reset(); + var out: std.io.Writer.Allocating = .init(testing.arena_allocator); + const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_press_form.html", &out.writer); + defer server.deinit(); + + const msg = + \\{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"html"}} + ; + try router.handleMessage(server, testing.arena_allocator, msg); + try testing.expect(std.mem.indexOf(u8, out.written(), "") != null); + try testing.expect(std.mem.indexOf(u8, out.written(), "