From 2eb995e0eec8a03588b8805aa78b2584895eae94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Sat, 30 May 2026 22:54:27 +0200 Subject: [PATCH] links: return structured link objects with text and node ID Updates `collectLinks` to return a `Link` struct containing the href, visible text, and backend node ID. The links tool now outputs JSON. --- src/browser/interactive.zig | 2 +- src/browser/links.zig | 100 +++++++++++++++++++++++++++++++----- src/browser/tools.zig | 7 +-- 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index ff7325e1..cc0e121f 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -429,7 +429,7 @@ fn getAccessibleName(el: *Element, arena: Allocator) !?[]const u8 { return try getTextContent(el.asNode(), arena); } -fn getTextContent(node: *Node, arena: Allocator) !?[]const u8 { +pub fn getTextContent(node: *Node, arena: Allocator) !?[]const u8 { var tw: TreeWalker.FullExcludeSelf = .init(node, .{}); var arr: std.ArrayList(u8) = .empty; diff --git a/src/browser/links.zig b/src/browser/links.zig index 158bca61..7cce35be 100644 --- a/src/browser/links.zig +++ b/src/browser/links.zig @@ -22,33 +22,105 @@ const Element = @import("webapi/Element.zig"); const Node = @import("webapi/Node.zig"); const Frame = @import("Frame.zig"); const Selector = @import("webapi/selector/Selector.zig"); +const interactive = @import("interactive.zig"); +const log = @import("../lightpanda.zig").log; const Allocator = std.mem.Allocator; -/// Collect all links (href attributes from anchor tags) under `root`. -/// Returns a slice of strings allocated with `arena`. -pub fn collectLinks(arena: Allocator, root: *Node, frame: *Frame) ![]const []const u8 { - var links: std.ArrayList([]const u8) = .empty; +pub const Link = struct { + backendNodeId: ?u32 = null, + node: *Node, + text: ?[]const u8, + href: []const u8, + + pub fn jsonStringify(self: *const Link, jw: anytype) !void { + try jw.beginObject(); + if (self.backendNodeId) |id| { + try jw.objectField("backendNodeId"); + try jw.write(id); + } + if (self.text) |t| { + try jw.objectField("text"); + try jw.write(t); + } + try jw.objectField("href"); + try jw.write(self.href); + try jw.endObject(); + } +}; + +/// Populate backendNodeId on each link by registering its node in the registry. +pub fn registerNodes(links: []Link, registry: anytype) !void { + for (links) |*l| { + const registered = try registry.register(l.node); + l.backendNodeId = registered.id; + } +} + +/// Collect all links (anchor tags with an href) under `root`. +pub fn collectLinks(arena: Allocator, root: *Node, frame: *Frame) ![]Link { + var links: std.ArrayList(Link) = .empty; if (Selector.querySelectorAll(root, "a[href]", frame)) |list| { defer list.deinit(frame._page); for (list._nodes) |node| { - if (node.is(Element.Html.Anchor)) |anchor| { - const href = anchor.getHref(frame) catch |err| { - @import("../lightpanda.zig").log.err(.app, "resolve href failed", .{ .err = err }); - continue; - }; + const anchor = node.is(Element.Html.Anchor) orelse continue; + const href = anchor.getHref(frame) catch |err| { + log.err(.app, "resolve href failed", .{ .err = err }); + continue; + }; + if (href.len == 0) continue; - if (href.len > 0) { - try links.append(arena, href); - } - } + try links.append(arena, .{ + .node = node, + .text = interactive.getTextContent(node, arena) catch null, + .href = href, + }); } } else |err| { - @import("../lightpanda.zig").log.err(.app, "query links failed", .{ .err = err }); + log.err(.app, "query links failed", .{ .err = err }); return err; } return links.items; } + +const testing = @import("../testing.zig"); + +// Caller must `defer testing.test_session.removePage()` after a successful +// call — the returned slices live in the page's call_arena. +fn testLinks(html: []const u8) ![]Link { + const frame = try testing.test_session.createPage(); + errdefer testing.test_session.removePage(); + + const doc = frame.window._document; + const div = try doc.createElement("div", null, frame); + try frame.parseHtmlAsChildren(div.asNode(), html); + + return collectLinks(frame.call_arena, div.asNode(), frame); +} + +test "links: text and href" { + const links = try testLinks( + \\Sign in + \\ Next page + \\no href, skipped + ); + defer testing.test_session.removePage(); + + try testing.expectEqual(2, links.len); + try testing.expectEqual("Sign in", links[0].text.?); + try testing.expectEqual("https://example.com/login", links[0].href); + try testing.expectEqual("Next page", links[1].text.?); +} + +test "links: empty text" { + const links = try testLinks( + \\ + ); + defer testing.test_session.removePage(); + + try testing.expectEqual(1, links.len); + try testing.expectEqual(null, links[0].text); +} diff --git a/src/browser/tools.zig b/src/browser/tools.zig index 576b27ce..0474bda2 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -187,7 +187,7 @@ pub const Tool = enum { ), }, .links => .{ - .description = "Extract all links in the opened page. If a url is provided, it navigates to that url first.", + .description = "Extract all links in the opened page as JSON objects with `text` (visible anchor text), `href` (resolved URL), and `backendNodeId` (pass to click/nodeDetails). If a url is provided, it navigates to that url first.", .summary = "List all links on the page", .input_schema = url_params_schema, }, @@ -870,8 +870,9 @@ fn execLinks(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode. const links_list = lp.links.collectLinks(arena, page.document.asNode(), page) catch return ToolError.InternalError; - - return std.mem.join(arena, "\n", links_list) catch return ToolError.InternalError; + lp.links.registerNodes(links_list, registry) catch + return ToolError.InternalError; + return renderJson(arena, links_list); } fn execTree(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 {