links: return structured link objects with text and node ID

Updates `collectLinks` to return a `Link` struct containing the href,
visible text, and backend node ID. The links tool now outputs JSON.
This commit is contained in:
Adrià Arrufat
2026-05-30 22:54:27 +02:00
parent ee96d8e813
commit 2eb995e0ee
3 changed files with 91 additions and 18 deletions

View File

@@ -429,7 +429,7 @@ fn getAccessibleName(el: *Element, arena: Allocator) !?[]const u8 {
return try getTextContent(el.asNode(), arena);
}
fn getTextContent(node: *Node, arena: Allocator) !?[]const u8 {
pub fn getTextContent(node: *Node, arena: Allocator) !?[]const u8 {
var tw: TreeWalker.FullExcludeSelf = .init(node, .{});
var arr: std.ArrayList(u8) = .empty;

View File

@@ -22,33 +22,105 @@ const Element = @import("webapi/Element.zig");
const Node = @import("webapi/Node.zig");
const Frame = @import("Frame.zig");
const Selector = @import("webapi/selector/Selector.zig");
const interactive = @import("interactive.zig");
const log = @import("../lightpanda.zig").log;
const Allocator = std.mem.Allocator;
/// Collect all links (href attributes from anchor tags) under `root`.
/// Returns a slice of strings allocated with `arena`.
pub fn collectLinks(arena: Allocator, root: *Node, frame: *Frame) ![]const []const u8 {
var links: std.ArrayList([]const u8) = .empty;
pub const Link = struct {
backendNodeId: ?u32 = null,
node: *Node,
text: ?[]const u8,
href: []const u8,
pub fn jsonStringify(self: *const Link, jw: anytype) !void {
try jw.beginObject();
if (self.backendNodeId) |id| {
try jw.objectField("backendNodeId");
try jw.write(id);
}
if (self.text) |t| {
try jw.objectField("text");
try jw.write(t);
}
try jw.objectField("href");
try jw.write(self.href);
try jw.endObject();
}
};
/// Populate backendNodeId on each link by registering its node in the registry.
pub fn registerNodes(links: []Link, registry: anytype) !void {
for (links) |*l| {
const registered = try registry.register(l.node);
l.backendNodeId = registered.id;
}
}
/// Collect all links (anchor tags with an href) under `root`.
pub fn collectLinks(arena: Allocator, root: *Node, frame: *Frame) ![]Link {
var links: std.ArrayList(Link) = .empty;
if (Selector.querySelectorAll(root, "a[href]", frame)) |list| {
defer list.deinit(frame._page);
for (list._nodes) |node| {
if (node.is(Element.Html.Anchor)) |anchor| {
const href = anchor.getHref(frame) catch |err| {
@import("../lightpanda.zig").log.err(.app, "resolve href failed", .{ .err = err });
continue;
};
const anchor = node.is(Element.Html.Anchor) orelse continue;
const href = anchor.getHref(frame) catch |err| {
log.err(.app, "resolve href failed", .{ .err = err });
continue;
};
if (href.len == 0) continue;
if (href.len > 0) {
try links.append(arena, href);
}
}
try links.append(arena, .{
.node = node,
.text = interactive.getTextContent(node, arena) catch null,
.href = href,
});
}
} else |err| {
@import("../lightpanda.zig").log.err(.app, "query links failed", .{ .err = err });
log.err(.app, "query links failed", .{ .err = err });
return err;
}
return links.items;
}
const testing = @import("../testing.zig");
// Caller must `defer testing.test_session.removePage()` after a successful
// call — the returned slices live in the page's call_arena.
fn testLinks(html: []const u8) ![]Link {
const frame = try testing.test_session.createPage();
errdefer testing.test_session.removePage();
const doc = frame.window._document;
const div = try doc.createElement("div", null, frame);
try frame.parseHtmlAsChildren(div.asNode(), html);
return collectLinks(frame.call_arena, div.asNode(), frame);
}
test "links: text and href" {
const links = try testLinks(
\\<a href="https://example.com/login">Sign in</a>
\\<a href="/page/2"> Next page </a>
\\<a>no href, skipped</a>
);
defer testing.test_session.removePage();
try testing.expectEqual(2, links.len);
try testing.expectEqual("Sign in", links[0].text.?);
try testing.expectEqual("https://example.com/login", links[0].href);
try testing.expectEqual("Next page", links[1].text.?);
}
test "links: empty text" {
const links = try testLinks(
\\<a href="/icon"><img src="i.png"></a>
);
defer testing.test_session.removePage();
try testing.expectEqual(1, links.len);
try testing.expectEqual(null, links[0].text);
}

View File

@@ -187,7 +187,7 @@ pub const Tool = enum {
),
},
.links => .{
.description = "Extract all links in the opened page. If a url is provided, it navigates to that url first.",
.description = "Extract all links in the opened page as JSON objects with `text` (visible anchor text), `href` (resolved URL), and `backendNodeId` (pass to click/nodeDetails). If a url is provided, it navigates to that url first.",
.summary = "List all links on the page",
.input_schema = url_params_schema,
},
@@ -870,8 +870,9 @@ fn execLinks(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.
const links_list = lp.links.collectLinks(arena, page.document.asNode(), page) catch
return ToolError.InternalError;
return std.mem.join(arena, "\n", links_list) catch return ToolError.InternalError;
lp.links.registerNodes(links_list, registry) catch
return ToolError.InternalError;
return renderJson(arena, links_list);
}
fn execTree(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 {