diff --git a/flake.lock b/flake.lock
index dcf837fa..01cb7067 100644
--- a/flake.lock
+++ b/flake.lock
@@ -8,11 +8,11 @@
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
- "lastModified": 1770708269,
- "narHash": "sha256-OnZW86app7hHJJoB5lC9GNXY5QBBIESJB+sIdwEyld0=",
+ "lastModified": 1778493576,
+ "narHash": "sha256-/vvNyF8C2tNTkxtffGUQbcTJvf72cRw3qo8cyBh33pM=",
"owner": "nix-community",
"repo": "fenix",
- "rev": "6b5325a017a9a9fe7e6252ccac3680cc7181cd63",
+ "rev": "5bf88a04d8678c7334f2f5072975f3b2cb0fe1ba",
"type": "github"
},
"original": {
@@ -55,24 +55,6 @@
"type": "github"
}
},
- "flake-utils_2": {
- "inputs": {
- "systems": "systems_2"
- },
- "locked": {
- "lastModified": 1705309234,
- "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
- "owner": "numtide",
- "repo": "flake-utils",
- "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
- "type": "github"
- },
- "original": {
- "owner": "numtide",
- "repo": "flake-utils",
- "type": "github"
- }
- },
"gitignore": {
"inputs": {
"nixpkgs": [
@@ -96,16 +78,16 @@
},
"nixpkgs": {
"locked": {
- "lastModified": 1768649915,
- "narHash": "sha256-jc21hKogFnxU7KXSVTRmxC7u5D4RHwm9BAvDf5/Z1Uo=",
+ "lastModified": 1778003029,
+ "narHash": "sha256-q/nkKLDtHIyLjZpKhWk3cSK5IYsFqtMd6UtXF3ddjgA=",
"owner": "nixos",
"repo": "nixpkgs",
- "rev": "3e3f3c7f9977dc123c23ee21e8085ed63daf8c37",
+ "rev": "0c88e1f2bdb93d5999019e99cb0e61e1fe2af4c5",
"type": "github"
},
"original": {
"owner": "nixos",
- "ref": "release-25.05",
+ "ref": "nixos-25.11",
"repo": "nixpkgs",
"type": "github"
}
@@ -122,11 +104,11 @@
"rust-analyzer-src": {
"flake": false,
"locked": {
- "lastModified": 1770668050,
- "narHash": "sha256-Q05yaIZtQrBKHpyWaPmyJmDRj0lojnVf8nUFE0vydcY=",
+ "lastModified": 1778424672,
+ "narHash": "sha256-v/CZ9tJT+ulSe3ZmjuG3lWABwOvITbT7EqF/2NAl3Hs=",
"owner": "rust-lang",
"repo": "rust-analyzer",
- "rev": "9efc1f709f3c8134c3acac5d3592a8e4c184a0c6",
+ "rev": "e266f5cab8f6525d0bc2ddccc0006418c534b5e6",
"type": "github"
},
"original": {
@@ -152,6 +134,7 @@
}
},
"systems_2": {
+ "flake": false,
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
@@ -169,17 +152,17 @@
"zigPkgs": {
"inputs": {
"flake-compat": "flake-compat",
- "flake-utils": "flake-utils_2",
"nixpkgs": [
"nixpkgs"
- ]
+ ],
+ "systems": "systems_2"
},
"locked": {
- "lastModified": 1770598090,
- "narHash": "sha256-k+82IDgTd9o5sxHIqGlvfwseKln3Ejx1edGtDltuPXo=",
+ "lastModified": 1778375309,
+ "narHash": "sha256-3+5C2LDX1lmupM6ktG6i50BRvRnN32WLinpxqa2g+HQ=",
"owner": "mitchellh",
"repo": "zig-overlay",
- "rev": "142495696982c88edddc8e17e4da90d8164acadf",
+ "rev": "057bcab6a8e6a3a85e9293e150d35c63404e8fca",
"type": "github"
},
"original": {
diff --git a/flake.nix b/flake.nix
index d306ae09..ffa96a97 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
description = "headless browser designed for AI and automation";
inputs = {
- nixpkgs.url = "github:nixos/nixpkgs/release-25.05";
+ nixpkgs.url = "github:nixos/nixpkgs/nixos-25.11";
zigPkgs.url = "github:mitchellh/zig-overlay";
zigPkgs.inputs.nixpkgs.follows = "nixpkgs";
diff --git a/src/Config.zig b/src/Config.zig
index 2e5bcdb3..a4e9afff 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -143,6 +143,24 @@ fn waitScriptFileValidator(allocator: Allocator, args: *std.process.ArgIterator)
};
}
+fn injectScriptFileValidator(
+ allocator: Allocator,
+ args: *std.process.ArgIterator,
+ list: *std.ArrayList([]const u8),
+) !void {
+ const path = args.next() orelse {
+ log.fatal(.app, "missing argument value", .{ .arg = "--inject-script-file" });
+ return error.InvalidArgument;
+ };
+
+ const bytes = std.fs.cwd().readFileAllocOptions(allocator, path, std.math.maxInt(usize), null, .of(u8), null) catch |err| {
+ log.fatal(.app, "failed to read file", .{ .arg = "--inject-script-file", .path = path, .err = err });
+ return error.InvalidArgument;
+ };
+
+ return list.append(allocator, bytes);
+}
+
/// Definition for all the commands and its arguments. See @cli.zig for further.
const Commands = cli.Builder(.{
.{
@@ -176,6 +194,14 @@ const Commands = cli.Builder(.{
},
},
.{ .name = "wait_selector", .type = ?[:0]const u8 },
+ .{
+ .name = "inject_script",
+ .type = []const u8,
+ .multiple = true,
+ .variants = .{
+ .{ .name = "inject_script_file", .validator = injectScriptFileValidator },
+ },
+ },
.{ .name = "terminate_ms", .type = ?u32 },
},
.shared_options = CommonOptions,
@@ -688,6 +714,15 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\--wait-script-file
\\ Like --wait-script, but reads the script from a file.
\\
+ \\--inject-script JavaScript to execute as the document's
is
+ \\ parsed, before any other scripts in the page run.
+ \\ Can be passed multiple times; scripts run in order.
+ \\
+ \\--inject-script-file
+ \\ Like --inject-script, but reads the script from a file.
+ \\ Can be passed multiple times; can be mixed with
+ \\ --inject-script and runs in CLI order.
+ \\
\\--terminate-ms Hard deadline in milliseconds. After this time elapses,
\\ JavaScript execution is forcibly terminated (e.g. for
\\ pages with endless scripts). Unlike --wait-ms, which
diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig
index 29c70aaf..78d64a83 100644
--- a/src/browser/Frame.zig
+++ b/src/browser/Frame.zig
@@ -1815,26 +1815,12 @@ pub fn notifyNetworkAlmostIdle(self: *Frame) void {
});
}
-// called from the parser
-pub fn appendNew(self: *Frame, parent: *Node, child: Node.NodeOrText) !void {
- const node = switch (child) {
- .node => |n| n,
- .text => |txt| blk: {
- // If we're appending this adjacently to a text node, we should merge
- if (parent.lastChild()) |sibling| {
- if (sibling.is(CData.Text)) |tn| {
- const cdata = tn._proto;
- const existing = cdata.getData().str();
- cdata._data = try String.concat(self.arena, &.{ existing, txt });
- return;
- }
- }
- break :blk try self.createTextNode(txt);
- },
- };
-
- lp.assert(node._parent == null, "Frame.appendNew", .{});
- try self._insertNodeRelative(true, parent, node, .append, .{
+// called from the parser. Text-node merging is the parser's responsibility
+// (see Parser.appendTextChunk in src/browser/parser/Parser.zig); this is the
+// "insert this fully-formed node as a new last child of parent" entry point.
+pub fn appendNew(self: *Frame, parent: *Node, child: *Node) !void {
+ lp.assert(child._parent == null, "Frame.appendNew", .{});
+ try self._insertNodeRelative(true, parent, child, .append, .{
// this opts has no meaning since we're passing `true` as the first
// parameter, which indicates this comes from the parser, and has its
// own special processing. Still, set it to be clear.
@@ -2139,12 +2125,35 @@ pub fn createElementNS(self: *Frame, namespace: Element.Namespace, name: []const
attribute_iterator,
.{ ._proto = undefined },
),
- asUint("head") => return self.createHtmlElementT(
- Element.Html.Head,
- namespace,
- attribute_iterator,
- .{ ._proto = undefined },
- ),
+ asUint("head") => {
+ // Inject user-provided scripts.
+ const inject_scripts = self._session.inject_scripts;
+ const should_inject_scripts = from_parser and self._parse_mode == .document and inject_scripts.len > 0;
+
+ if (should_inject_scripts) {
+ var ls: JS.Local.Scope = undefined;
+ self.js.localScope(&ls);
+ defer ls.deinit();
+
+ for (inject_scripts) |inject_script| {
+ var try_catch: JS.TryCatch = undefined;
+ try_catch.init(&ls.local);
+ defer try_catch.deinit();
+
+ ls.local.eval(inject_script, "inject_script") catch |err| {
+ const caught = try_catch.caughtOrError(self.call_arena, err);
+ log.err(.app, "inject script error", .{ .err = caught });
+ };
+ }
+ }
+
+ return self.createHtmlElementT(
+ Element.Html.Head,
+ namespace,
+ attribute_iterator,
+ .{ ._proto = undefined },
+ );
+ },
asUint("body") => return self.createHtmlElementT(
Element.Html.Body,
namespace,
@@ -4121,6 +4130,12 @@ test "WebApi: Integration" {
try testing.htmlRunner("integration", .{});
}
+test "WebApi: inject_script" {
+ try testing.htmlRunner("inject_script.html", .{
+ .inject_script = "window.__injected = true; window.__injectValue = 42;",
+ });
+}
+
test "Page: isSameOrigin" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
diff --git a/src/browser/Session.zig b/src/browser/Session.zig
index faf86ad2..2d27a396 100644
--- a/src/browser/Session.zig
+++ b/src/browser/Session.zig
@@ -57,6 +57,8 @@ navigation: Navigation,
storage_shed: storage.Shed,
notification: *Notification,
cookie_jar: storage.Cookie.Jar,
+/// User-provided scripts to inject into header.
+inject_scripts: []const []const u8 = &.{},
// Shared allocator. Used by Session itself and borrowed by Pages.
arena_pool: *ArenaPool,
diff --git a/src/browser/js/bridge.zig b/src/browser/js/bridge.zig
index 366f83af..9761540b 100644
--- a/src/browser/js/bridge.zig
+++ b/src/browser/js/bridge.zig
@@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{
@import("../webapi/CryptoKey.zig"),
@import("../webapi/Selection.zig"),
@import("../webapi/ImageData.zig"),
+ @import("../webapi/XPathResult.zig"),
+ @import("../webapi/XPathExpression.zig"),
+ @import("../webapi/XPathEvaluator.zig"),
});
// APIs available on Worker context globals (constructors like URL, Headers, etc.)
diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig
index 44756aed..fe6e1219 100644
--- a/src/browser/parser/Parser.zig
+++ b/src/browser/parser/Parser.zig
@@ -23,6 +23,7 @@ const h5e = @import("html5ever.zig");
const Frame = @import("../Frame.zig");
const Node = @import("../webapi/Node.zig");
const Element = @import("../webapi/Element.zig");
+const CData = @import("../webapi/CData.zig");
pub const AttributeIterator = h5e.AttributeIterator;
@@ -39,6 +40,18 @@ pub const ParsedNode = struct {
data: ?*anyopaque,
};
+// html5ever's tokenizer flushes the script-data character buffer on every '<'
+// (script-data-less-than-sign-state transition), which produces a separate
+// AppendText callback per chunk. Merging via String.concat in the previous
+// implementation was O(N^2/chunk_size) on the page-lifetime arena, blowing
+// memory on inline JS that contains embedded HTML strings (issue #2397).
+// Instead, we keep a single Parser-level buf and accumulate same-parent
+// chunks into it, committing once on flush.
+const PendingText = struct {
+ parent: *Node,
+ text_node: *CData,
+};
+
const Parser = @This();
frame: *Frame,
@@ -46,6 +59,17 @@ err: ?Error,
container: ParsedNode,
arena: Allocator,
strings: std.StringHashMapUnmanaged(void),
+pending_text: ?PendingText,
+// One buffer reused across every text run in this parser. clearRetainingCapacity
+// on flush keeps the largest capacity ever needed, so total dead memory on the
+// parser arena is bounded to one peak-run-sized allocation regardless of how
+// many text runs the parse contains. Matters for Streaming, whose arena is the
+// page-lifetime frame.arena (individual frees are no-ops there).
+//
+// Single-chunk text runs leave this buf empty: the chunk lives only in
+// CData._data via createTextNode. The buf is seeded from _data.str() on the
+// second chunk of a run, so the common case stays at one copy.
+buf: std.ArrayList(u8),
pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser {
return .{
@@ -57,6 +81,62 @@ pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser {
.data = null,
.node = node,
},
+ .pending_text = null,
+ .buf = .empty,
+ };
+}
+
+pub fn flushPendingText(self: *Parser) !void {
+ const pt = self.pending_text orelse return;
+ self.pending_text = null;
+ // Single-chunk run: data already lives on _data via createTextNode.
+ if (self.buf.items.len == 0) return;
+ defer self.buf.clearRetainingCapacity();
+ pt.text_node._data = try lp.String.init(
+ self.frame.arena,
+ self.buf.items,
+ .{ .dupe = true },
+ );
+}
+
+fn appendTextChunk(self: *Parser, parent: *Node, txt: []const u8) !void {
+ if (self.pending_text) |pt| {
+ if (pt.parent == parent and parent.lastChild() == pt.text_node.asNode()) {
+ // Second+ chunk of the same run. If buf is still empty, promote
+ // from the single-chunk fast path by seeding from _data first.
+ if (self.buf.items.len == 0) {
+ const existing = pt.text_node.getData().str();
+ try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len);
+ self.buf.appendSliceAssumeCapacity(existing);
+ }
+ try self.buf.appendSlice(self.arena, txt);
+ return;
+ }
+ try self.flushPendingText();
+ }
+
+ if (parent.lastChild()) |sibling| {
+ if (sibling.is(CData.Text)) |tn| {
+ // Existing text sibling without a matching pending_text. Seed the
+ // buf from its _data and register pending so subsequent chunks
+ // accumulate cheaply.
+ const cdata = tn._proto;
+ const existing = cdata.getData().str();
+ try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len);
+ self.buf.appendSliceAssumeCapacity(existing);
+ self.buf.appendSliceAssumeCapacity(txt);
+ self.pending_text = .{ .parent = parent, .text_node = cdata };
+ return;
+ }
+ }
+
+ // Fresh text run: the first chunk lives on _data only. buf stays empty
+ // until (and unless) a second chunk arrives.
+ const new_text = try self.frame.createTextNode(txt);
+ try self.frame.appendNew(parent, new_text);
+ self.pending_text = .{
+ .parent = parent,
+ .text_node = new_text.is(CData.Text).?._proto,
};
}
@@ -101,6 +181,9 @@ pub fn parse(self: *Parser, html: []const u8) void {
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
+ self.flushPendingText() catch |err| {
+ if (self.err == null) self.err = .{ .err = err, .source = .append };
+ };
}
/// Parse HTML with encoding conversion. Converts from charset to UTF-8 before parsing.
@@ -127,6 +210,9 @@ pub fn parseWithEncoding(self: *Parser, html: []const u8, charset: []const u8) v
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
+ self.flushPendingText() catch |err| {
+ if (self.err == null) self.err = .{ .err = err, .source = .append };
+ };
}
pub fn parseXML(self: *Parser, xml: []const u8) void {
@@ -150,6 +236,9 @@ pub fn parseXML(self: *Parser, xml: []const u8) void {
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
+ self.flushPendingText() catch |err| {
+ if (self.err == null) self.err = .{ .err = err, .source = .append };
+ };
}
pub fn parseFragment(self: *Parser, html: []const u8) void {
@@ -173,6 +262,9 @@ pub fn parseFragment(self: *Parser, html: []const u8) void {
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
+ self.flushPendingText() catch |err| {
+ if (self.err == null) self.err = .{ .err = err, .source = .append };
+ };
}
pub const Streaming = struct {
@@ -233,8 +325,16 @@ pub const Streaming = struct {
}
}
- pub fn done(self: *Streaming) void {
- h5e.html5ever_streaming_parser_finish(self.handle.?);
+ pub fn done(self: *Streaming) !void {
+ // Null the handle before finish() so a flushPendingText failure can't
+ // leave a finished-but-still-referenced handle behind for deinit to
+ // double-free. flushPendingText doesn't touch the html5ever handle —
+ // it only reads pending_text and writes to a text node's _data — so
+ // running it after finish is safe.
+ const handle = self.handle.?;
+ self.handle = null;
+ h5e.html5ever_streaming_parser_finish(handle);
+ try self.parser.flushPendingText();
}
};
@@ -252,6 +352,9 @@ fn popCallback(ctx: *anyopaque, node_ref: *anyopaque) callconv(.c) void {
}
fn _popCallback(self: *Parser, node: *Node) !void {
+ // Flush before any nodeComplete so Build.complete (and any custom-element
+ // callbacks reachable from it) observe the final text data.
+ try self.flushPendingText();
try self.frame.nodeComplete(node);
}
@@ -340,7 +443,7 @@ fn _appendDoctypeToDocument(self: *Parser, name: []const u8, public_id: []const
});
// Append it to the document
- try frame.appendNew(self.container.node, .{ .node = doctype.asNode() });
+ try frame.appendNew(self.container.node, doctype.asNode());
}
fn addAttrsIfMissingCallback(ctx: *anyopaque, target_ref: *anyopaque, attributes: h5e.AttributeIterator) callconv(.c) void {
@@ -402,6 +505,10 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !
// child node is guaranteed not to belong to another parent
switch (node_or_text.toUnion()) {
.node => |cpn| {
+ // Inserting a non-text child terminates any pending text run; flush
+ // before the insertion so that connectedCallback (etc.) sees the
+ // final data on the preceding text sibling.
+ try self.flushPendingText();
const child = getNode(cpn);
if (child._parent) |previous_parent| {
// html5ever says this can't happen, but we might be screwing up
@@ -414,9 +521,9 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !
}
self.frame.removeNode(previous_parent, child, .{ .will_be_reconnected = parent.isConnected() });
}
- try self.frame.appendNew(parent, .{ .node = child });
+ try self.frame.appendNew(parent, child);
},
- .text => |txt| try self.frame.appendNew(parent, .{ .text = txt }),
+ .text => |txt| try self.appendTextChunk(parent, txt),
}
}
@@ -427,6 +534,11 @@ fn removeFromParentCallback(ctx: *anyopaque, target_ref: *anyopaque) callconv(.c
};
}
fn _removeFromParentCallback(self: *Parser, node: *Node) !void {
+ // Removing a node mid-parse can detach the pending text node or its
+ // parent; either way the pending invariant breaks. Flush first so the
+ // accumulated bytes land on a still-attached text node (and pending_text
+ // is cleared before any subsequent chunk targets a fresh node).
+ try self.flushPendingText();
const parent = node.parentNode() orelse return;
_ = try parent.removeChild(node, self.frame);
}
@@ -438,6 +550,10 @@ fn reparentChildrenCallback(ctx: *anyopaque, node_ref: *anyopaque, new_parent_re
};
}
fn _reparentChildrenCallback(self: *Parser, node: *Node, new_parent: *Node) !void {
+ // Reparenting can move the pending text node out from under us — the
+ // node's _parent changes but pending_text.parent does not. Flush so the
+ // accumulator commits before the tree is rearranged.
+ try self.flushPendingText();
try self.frame.appendAllChildren(node, new_parent);
}
@@ -448,6 +564,10 @@ fn appendBeforeSiblingCallback(ctx: *anyopaque, sibling_ref: *anyopaque, node_or
};
}
fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e.NodeOrText) !void {
+ // Foster parenting / before-sibling insertions interrupt any pending text
+ // run (the new node lands at a different position from the pending text's
+ // tail). Flush before reading the parent's structure.
+ try self.flushPendingText();
const parent = sibling.parentNode() orelse return error.NoParent;
const node: *Node = switch (node_or_text.toUnion()) {
.node => |cpn| blk: {
diff --git a/src/browser/tests/cdata/raw_text_chunked.html b/src/browser/tests/cdata/raw_text_chunked.html
new file mode 100644
index 00000000..8a5503a4
--- /dev/null
+++ b/src/browser/tests/cdata/raw_text_chunked.html
@@ -0,0 +1,128 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+A page <with> many <tags> in <the> title for testing
+
+
diff --git a/src/browser/tests/cdp/perform_search_xpath.html b/src/browser/tests/cdp/perform_search_xpath.html
new file mode 100644
index 00000000..e30ca1c1
--- /dev/null
+++ b/src/browser/tests/cdp/perform_search_xpath.html
@@ -0,0 +1,8 @@
+
+
+
+ 3
+
diff --git a/src/browser/tests/document/document.html b/src/browser/tests/document/document.html
index ede2b507..eb69f5d8 100644
--- a/src/browser/tests/document/document.html
+++ b/src/browser/tests/document/document.html
@@ -380,6 +380,53 @@
testing.expectEqual(0, nd.childElementCount);
+
+
diff --git a/src/browser/tests/element/html/link.html b/src/browser/tests/element/html/link.html
index 57d9e28b..9f4dd6a8 100644
--- a/src/browser/tests/element/html/link.html
+++ b/src/browser/tests/element/html/link.html
@@ -1,6 +1,32 @@
+
+
+
+
+
@@ -21,9 +21,9 @@
const container = $('#container');
// Invalid nth patterns
- testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(foo)'));
- testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(-)'));
- testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(+)'));
+ testing.expectError("SyntaxError", () => container.querySelector(':nth-child(foo)'));
+ testing.expectError("SyntaxError", () => container.querySelector(':nth-child(-)'));
+ testing.expectError("SyntaxError", () => container.querySelector(':nth-child(+)'));
}
@@ -32,9 +32,9 @@
const container = $('#container');
// Unknown pseudo-classes
- testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':unknown'));
- testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':not-a-real-pseudo'));
- testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':fake(test)'));
+ testing.expectError("SyntaxError", () => container.querySelector(':unknown'));
+ testing.expectError("SyntaxError", () => container.querySelector(':not-a-real-pseudo'));
+ testing.expectError("SyntaxError", () => container.querySelector(':fake(test)'));
}
@@ -53,8 +53,8 @@
const container = $('#container');
// Combinators with nothing after
- testing.expectError("Error: InvalidSelector", () => container.querySelector('p >'));
- testing.expectError("Error: InvalidSelector", () => container.querySelector('p +'));
- testing.expectError("Error: InvalidSelector", () => container.querySelector('p ~'));
+ testing.expectError("SyntaxError", () => container.querySelector('p >'));
+ testing.expectError("SyntaxError", () => container.querySelector('p +'));
+ testing.expectError("SyntaxError", () => container.querySelector('p ~'));
}
diff --git a/src/browser/tests/inject_script.html b/src/browser/tests/inject_script.html
new file mode 100644
index 00000000..f7917a54
--- /dev/null
+++ b/src/browser/tests/inject_script.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/tests/page/meta.html b/src/browser/tests/page/meta.html
index 3c03f403..98fb1688 100644
--- a/src/browser/tests/page/meta.html
+++ b/src/browser/tests/page/meta.html
@@ -30,7 +30,8 @@
testing.expectEqual('undefined', typeof plainDoc.scripts);
testing.expectEqual('undefined', typeof plainDoc.links);
testing.expectEqual('undefined', typeof plainDoc.forms);
- testing.expectEqual('undefined', typeof plainDoc.location);
+ // location lives on Document (returns null for non-HTMLDocument).
+ testing.expectEqual(null, plainDoc.location);
// Both should have common Document properties
testing.expectEqual('string', typeof document.URL);
diff --git a/src/browser/tests/xpath/document_evaluate.html b/src/browser/tests/xpath/document_evaluate.html
new file mode 100644
index 00000000..2c4fdc58
--- /dev/null
+++ b/src/browser/tests/xpath/document_evaluate.html
@@ -0,0 +1,123 @@
+
+
+
+ Hello
+
+
First
+
Second
+
Third
+
+ x
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/tests/xpath/xpath_conformance.html b/src/browser/tests/xpath/xpath_conformance.html
new file mode 100644
index 00000000..7080ecb6
--- /dev/null
+++ b/src/browser/tests/xpath/xpath_conformance.html
@@ -0,0 +1,201 @@
+
+
+
+ XPath conformance
+
+
+
+ Hello World
+ First paragraph with emphasis.
+ Second paragraph.
+
+ - Item 1
+ - Item 2
+ - Item 3
+ - Item 4
+ - Item 5
+
+
+ | Name | Age |
+
+ | Alice | 30 |
+ | Bob | 25 |
+ | Carol | 40 |
+
+
+
+
+
+
+
+ One
+ Two
+ Three
+
+
+
+
+
diff --git a/src/browser/tests/xpath/xpath_evaluator.html b/src/browser/tests/xpath/xpath_evaluator.html
new file mode 100644
index 00000000..6cb6a886
--- /dev/null
+++ b/src/browser/tests/xpath/xpath_evaluator.html
@@ -0,0 +1,103 @@
+
+
+
+ Hello
+ One
+ Two
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html
new file mode 100644
index 00000000..21cac3fc
--- /dev/null
+++ b/src/browser/tests/xpath/xpath_perf.html
@@ -0,0 +1,171 @@
+
+
+
+ XPath perf benchmark
+
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/tests/xpath/xpath_result.html b/src/browser/tests/xpath/xpath_result.html
new file mode 100644
index 00000000..f7674e7b
--- /dev/null
+++ b/src/browser/tests/xpath/xpath_result.html
@@ -0,0 +1,193 @@
+
+
+
+ Hello
+ One
+ Two
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/webapi/DOMImplementation.zig b/src/browser/webapi/DOMImplementation.zig
index 777a9571..280db6a8 100644
--- a/src/browser/webapi/DOMImplementation.zig
+++ b/src/browser/webapi/DOMImplementation.zig
@@ -78,7 +78,7 @@ pub fn createDocument(_: *const DOMImplementation, namespace_: ?[]const u8, qual
// Create and append root element if qualified_name provided
if (qualified_name) |qname| {
if (qname.len > 0) {
- const namespace = if (namespace_) |ns| Node.Element.Namespace.parse(ns) else .xml;
+ const namespace = Node.Element.Namespace.parse(namespace_);
const root = try frame.createElementNS(namespace, qname, null);
_ = try document.asNode().appendChild(root, frame);
}
diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig
index 411eb678..ca9dc7a7 100644
--- a/src/browser/webapi/Document.zig
+++ b/src/browser/webapi/Document.zig
@@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig");
const StyleSheetList = @import("css/StyleSheetList.zig");
const FontFaceSet = @import("css/FontFaceSet.zig");
const Selection = @import("Selection.zig");
+const XPathResult = @import("XPathResult.zig");
+const XPathExpression = @import("XPathExpression.zig");
pub const XMLDocument = @import("XMLDocument.zig");
pub const HTMLDocument = @import("HTMLDocument.zig");
@@ -119,7 +121,18 @@ pub fn asEventTarget(self: *Document) *@import("EventTarget.zig") {
}
pub fn getURL(self: *const Document, frame: *const Frame) [:0]const u8 {
- return self._url orelse frame.url;
+ return self._url orelse (self._frame orelse frame).url;
+}
+
+pub fn getLocation(self: *const Document) ?*Location {
+ if (self._type != .html) return null;
+ const doc_frame = self._frame orelse return null;
+ return doc_frame.window._location;
+}
+
+pub fn setLocation(self: *Document, url: [:0]const u8, frame: *Frame) !void {
+ if (self._type != .html) return;
+ return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._frame });
}
pub fn getContentType(self: *const Document) []const u8 {
@@ -277,11 +290,11 @@ pub fn getSelection(self: *Document) *Selection {
}
pub fn querySelector(self: *Document, input: String, frame: *Frame) !?*Element {
- return Selector.querySelector(self.asNode(), input.str(), frame);
+ return Selector.querySelector(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn querySelectorAll(self: *Document, input: String, frame: *Frame) !*Selector.List {
- return Selector.querySelectorAll(self.asNode(), input.str(), frame);
+ return Selector.querySelectorAll(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn getImplementation(self: *Document, frame: *Frame) !*DOMImplementation {
@@ -412,6 +425,44 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val
return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame);
}
+pub fn evaluate(
+ self: *Document,
+ expression: []const u8,
+ context_node: ?*Node,
+ resolver: ?js.Function,
+ result_type: ?u16,
+ result: ?*XPathResult,
+ frame: *Frame,
+) !*XPathResult {
+ // resolver/result are no-ops in HTML mode (decision #2).
+ // Null/missing context_node falls back to the document — matches the
+ // polyfill (decision #2). Firefox throws TypeError on a *missing*
+ // arg, but the bridge can't distinguish "missing" from "explicit
+ // null" here, so polyfill parity wins for the ambiguity.
+ _ = resolver;
+ _ = result;
+ return XPathResult.fromExpression(
+ expression,
+ context_node orelse self.asNode(),
+ result_type orelse XPathResult.ANY_TYPE,
+ frame,
+ );
+}
+
+pub fn createExpression(
+ _: *const Document,
+ expression: []const u8,
+ resolver: ?js.Function,
+ frame: *Frame,
+) !*XPathExpression {
+ _ = resolver;
+ return XPathExpression.init(expression, frame);
+}
+
+pub fn createNSResolver(_: *const Document, node: *Node) ?*Node {
+ return node;
+}
+
fn whatToShow(value_: ?js.Value) !u32 {
const value = value_ orelse return 4294967295; // show all when undefined
if (value.isUndefined()) {
@@ -465,15 +516,21 @@ pub fn getFonts(self: *Document, frame: *Frame) !*FontFaceSet {
return fonts;
}
-pub fn adoptNode(_: *const Document, node: *Node, frame: *Frame) !*Node {
+pub fn adoptNode(self: *Document, node: *Node, frame: *Frame) !*Node {
if (node._type == .document) {
return error.NotSupported;
}
+ const old_owner = node.ownerDocument(frame) orelse frame.document;
+
if (node._parent) |parent| {
frame.removeNode(parent, node, .{ .will_be_reconnected = false });
}
+ if (old_owner != self) {
+ try frame.adoptNodeTree(node, old_owner, self);
+ }
+
return node;
}
@@ -666,7 +723,13 @@ fn writeInternal(self: *Document, text: []const []const u8, append_newline: bool
if (self._script_created_parser) |*parser| {
parser.read(html) catch |err| {
log.warn(.dom, "document.write parser error", .{ .err = err });
- // was already closed
+ // html5ever's handle was destroyed inside read(), but the
+ // pending text buffer (if any) still wants to land on its
+ // text node's _data — flushPendingText doesn't depend on
+ // the handle, so attempt a final flush before dropping.
+ parser.parser.flushPendingText() catch |flush_err| {
+ log.warn(.dom, "flush after parser panic", .{ .err = flush_err });
+ };
self._script_created_parser = null;
};
}
@@ -795,12 +858,12 @@ pub fn close(self: *Document, frame: *Frame) !void {
return;
}
- // done() calls html5ever_streaming_parser_finish which frees the parser
- // We must NOT call deinit() after done() as that would be a double-free
- self._script_created_parser.?.done();
- // Just null out the handle since done() already freed it
- self._script_created_parser.?.handle = null;
- self._script_created_parser = null;
+ // done() finishes html5ever's handle and runs the final flushPendingText.
+ // Even if flushPendingText errors, the handle is already finished and we
+ // must not retain the Streaming — defer so the error path also drops it.
+ // (Streaming.done nulls its own handle, so dropping the struct is safe.)
+ defer self._script_created_parser = null;
+ try self._script_created_parser.?.done();
frame.documentIsComplete();
}
@@ -1027,6 +1090,7 @@ pub const JsApi = struct {
pub const onselectionchange = bridge.accessor(Document.getOnSelectionChange, Document.setOnSelectionChange, .{});
pub const URL = bridge.accessor(Document.getURL, null, .{});
+ pub const location = bridge.accessor(Document.getLocation, Document.setLocation, .{});
pub const documentURI = bridge.accessor(Document.getURL, null, .{});
pub const documentElement = bridge.accessor(Document.getDocumentElement, null, .{});
pub const scrollingElement = bridge.accessor(Document.getDocumentElement, null, .{});
@@ -1051,6 +1115,9 @@ pub const JsApi = struct {
pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true });
pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{});
pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{});
+ pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true });
+ pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true });
+ pub const createNSResolver = bridge.function(Document.createNSResolver, .{});
pub const getElementById = bridge.function(_getElementById, .{});
fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element {
const value = value_ orelse return null;
@@ -1111,3 +1178,7 @@ const testing = @import("../../testing.zig");
test "WebApi: Document" {
try testing.htmlRunner("document", .{});
}
+
+test "WebApi: Document.evaluate" {
+ try testing.htmlRunner("xpath/document_evaluate.html", .{});
+}
diff --git a/src/browser/webapi/DocumentFragment.zig b/src/browser/webapi/DocumentFragment.zig
index 186bc68a..b55050f2 100644
--- a/src/browser/webapi/DocumentFragment.zig
+++ b/src/browser/webapi/DocumentFragment.zig
@@ -84,11 +84,11 @@ pub fn getElementById(self: *DocumentFragment, id: []const u8) ?*Element {
}
pub fn querySelector(self: *DocumentFragment, selector: []const u8, frame: *Frame) !?*Element {
- return Selector.querySelector(self.asNode(), selector, frame);
+ return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn querySelectorAll(self: *DocumentFragment, input: []const u8, frame: *Frame) !*Selector.List {
- return Selector.querySelectorAll(self.asNode(), input, frame);
+ return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn getChildren(self: *DocumentFragment, frame: *Frame) !collections.NodeLive(.child_elements) {
diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig
index 4de1a732..058875a6 100644
--- a/src/browser/webapi/Element.zig
+++ b/src/browser/webapi/Element.zig
@@ -1071,15 +1071,15 @@ pub fn getChildElementCount(self: *Element) usize {
}
pub fn matches(self: *Element, selector: []const u8, frame: *Frame) !bool {
- return Selector.matches(self, selector, frame);
+ return Selector.matches(self, selector, frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn querySelector(self: *Element, selector: []const u8, frame: *Frame) !?*Element {
- return Selector.querySelector(self.asNode(), selector, frame);
+ return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn querySelectorAll(self: *Element, input: []const u8, frame: *Frame) !*Selector.List {
- return Selector.querySelectorAll(self.asNode(), input, frame);
+ return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err);
}
pub fn getAnimations(_: *const Element) []*Animation {
diff --git a/src/browser/webapi/HTMLDocument.zig b/src/browser/webapi/HTMLDocument.zig
index 41782cc8..19e462a1 100644
--- a/src/browser/webapi/HTMLDocument.zig
+++ b/src/browser/webapi/HTMLDocument.zig
@@ -196,15 +196,6 @@ pub fn getCurrentScript(self: *const HTMLDocument) ?*Element.Html.Script {
return self._proto._current_script;
}
-pub fn getLocation(self: *const HTMLDocument) ?*@import("Location.zig") {
- const frame = self._proto._frame orelse return null;
- return frame.window._location;
-}
-
-pub fn setLocation(self: *HTMLDocument, url: [:0]const u8, frame: *Frame) !void {
- return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._proto._frame });
-}
-
pub fn getDir(self: *HTMLDocument) []const u8 {
const el = self._proto.getDocumentElement() orelse return "";
const html = el.is(Element.Html) orelse return "";
@@ -311,7 +302,6 @@ pub const JsApi = struct {
pub const applets = bridge.accessor(HTMLDocument.getApplets, null, .{});
pub const plugins = bridge.accessor(HTMLDocument.getEmbeds, null, .{});
pub const currentScript = bridge.accessor(HTMLDocument.getCurrentScript, null, .{});
- pub const location = bridge.accessor(HTMLDocument.getLocation, HTMLDocument.setLocation, .{});
pub const all = bridge.accessor(HTMLDocument.getAll, null, .{});
pub const cookie = bridge.accessor(HTMLDocument.getCookie, HTMLDocument.setCookie, .{});
pub const doctype = bridge.accessor(HTMLDocument.getDocType, null, .{});
diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig
index 7df1fd6a..c26411ed 100644
--- a/src/browser/webapi/Node.zig
+++ b/src/browser/webapi/Node.zig
@@ -166,7 +166,7 @@ pub fn findAdjacentNodes(self: *Node, position: []const u8) !struct { *Node, ?*N
// Returned if:
// * position is not one of the four listed values.
// * The input is XML that is not well-formed.
- return error.Syntax;
+ return error.SyntaxError;
}
pub fn firstChild(self: *const Node) ?*Node {
diff --git a/src/browser/webapi/Performance.zig b/src/browser/webapi/Performance.zig
index 22d37dd0..516d02cb 100644
--- a/src/browser/webapi/Performance.zig
+++ b/src/browser/webapi/Performance.zig
@@ -184,6 +184,11 @@ pub fn clearMeasures(self: *Performance, measure_name: ?[]const u8) void {
}
}
+pub fn setResourceTimingBufferSize(self: *Performance, max_size: u32) void {
+ _ = self;
+ _ = max_size;
+}
+
pub fn getEntries(self: *const Performance) []*Entry {
return self._entries.items;
}
@@ -281,6 +286,7 @@ pub const JsApi = struct {
pub const measure = bridge.function(Performance.measure, .{ .dom_exception = true });
pub const clearMarks = bridge.function(Performance.clearMarks, .{});
pub const clearMeasures = bridge.function(Performance.clearMeasures, .{});
+ pub const setResourceTimingBufferSize = bridge.function(Performance.setResourceTimingBufferSize, .{ .noop = true });
pub const getEntries = bridge.function(Performance.getEntries, .{});
pub const getEntriesByType = bridge.function(Performance.getEntriesByType, .{});
pub const getEntriesByName = bridge.function(Performance.getEntriesByName, .{});
diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig
index 67d24526..5d571fa6 100644
--- a/src/browser/webapi/Worker.zig
+++ b/src/browser/webapi/Worker.zig
@@ -416,5 +416,8 @@ pub const JsApi = struct {
const testing = @import("../../testing.zig");
test "WebApi: Worker" {
- try testing.htmlRunner("worker", .{});
+ // Worker tests chain a worker-script fetch with a dynamic-import fetch
+ // and a cross-context postMessage. The default 2 s assertion budget can
+ // blow up on TSAN CI; give it more room.
+ try testing.htmlRunner("worker", .{ .timeout_ms = 8000 });
}
diff --git a/src/browser/webapi/XPathEvaluator.zig b/src/browser/webapi/XPathEvaluator.zig
new file mode 100644
index 00000000..7cae18b9
--- /dev/null
+++ b/src/browser/webapi/XPathEvaluator.zig
@@ -0,0 +1,97 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation.
+//! Mirrors `Document.evaluate` / `Document.createExpression` /
+//! `Document.createNSResolver` so an explicit
+//! `new XPathEvaluator()` instance can be used in place of the
+//! document.
+
+const std = @import("std");
+
+const js = @import("../js/js.zig");
+const Frame = @import("../Frame.zig");
+
+const Node = @import("Node.zig");
+const XPathResult = @import("XPathResult.zig");
+const XPathExpression = @import("XPathExpression.zig");
+
+const XPathEvaluator = @This();
+
+// Padding to avoid zero-size struct identity_map collisions (matches
+// the convention in ResizeObserver.zig).
+_pad: bool = false,
+
+pub fn init() XPathEvaluator {
+ return .{};
+}
+
+pub fn evaluate(
+ _: *const XPathEvaluator,
+ expression: []const u8,
+ context_node: *Node,
+ resolver: ?js.Function,
+ requested_type: ?u16,
+ result: ?*XPathResult,
+ frame: *Frame,
+) !*XPathResult {
+ // Namespace resolver is accepted-and-ignored (HTML mode — decision #2).
+ // Result reuse is also a no-op; XPathResult.fromExpression always
+ // allocates a fresh instance.
+ _ = resolver;
+ _ = result;
+ return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame);
+}
+
+pub fn createExpression(
+ _: *const XPathEvaluator,
+ expression: []const u8,
+ resolver: ?js.Function,
+ frame: *Frame,
+) !*XPathExpression {
+ _ = resolver;
+ return XPathExpression.init(expression, frame);
+}
+
+pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node {
+ // HTML-mode passthrough — the WHATWG IDL accepts a Node and returns
+ // an `XPathNSResolver`, but in practice the input node is reused.
+ return node;
+}
+
+pub const JsApi = struct {
+ pub const bridge = js.Bridge(XPathEvaluator);
+
+ pub const Meta = struct {
+ pub const name = "XPathEvaluator";
+ pub const prototype_chain = bridge.prototypeChain();
+ pub var class_id: bridge.ClassId = undefined;
+ pub const empty_with_no_proto = true;
+ };
+
+ pub const constructor = bridge.constructor(XPathEvaluator.init, .{});
+ pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true });
+ pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true });
+ pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{});
+};
+
+const testing = @import("../../testing.zig");
+
+test "WebApi: XPathEvaluator + XPathExpression" {
+ try testing.htmlRunner("xpath/xpath_evaluator.html", .{});
+}
diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig
new file mode 100644
index 00000000..d801ac5a
--- /dev/null
+++ b/src/browser/webapi/XPathExpression.zig
@@ -0,0 +1,105 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! WHATWG `XPathExpression` — a parsed XPath expression cached for
+//! repeated evaluation. The parsed AST lives in this object's per-
+//! instance arena (long-lived); each `evaluate()` call gets a fresh
+//! arena for its own result data so multiple evaluations don't grow
+//! the AST arena.
+
+const std = @import("std");
+const lp = @import("lightpanda");
+
+const js = @import("../js/js.zig");
+const Page = @import("../Page.zig");
+const Frame = @import("../Frame.zig");
+
+const Node = @import("Node.zig");
+const XPathResult = @import("XPathResult.zig");
+
+const xpath = struct {
+ const Ast = @import("../xpath/ast.zig");
+ const Parser = @import("../xpath/Parser.zig");
+ const Evaluator = @import("../xpath/Evaluator.zig");
+};
+
+const Allocator = std.mem.Allocator;
+
+const XPathExpression = @This();
+
+_rc: lp.RC(u8) = .{},
+_arena: Allocator,
+_expr: *const xpath.Ast.Expr,
+
+pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression {
+ const arena = try frame.getArena(.tiny, "XPathExpression");
+ errdefer frame.releaseArena(arena);
+
+ // The AST borrows string slices from its input (literals, names,
+ // var refs, function names). `expression` is materialized in the JS
+ // call_arena and is reclaimed when the top-level call returns, so
+ // dupe into our long-lived arena before parsing.
+ const owned = try arena.dupe(u8, expression);
+ const expr = try xpath.Parser.parse(arena, owned);
+ const xe = try arena.create(XPathExpression);
+ xe.* = .{ ._arena = arena, ._expr = expr };
+ return xe;
+}
+
+pub fn evaluate(
+ self: *XPathExpression,
+ context_node: *Node,
+ requested_type: ?u16,
+ result: ?*XPathResult,
+ frame: *Frame,
+) !*XPathResult {
+ // The `result` reuse parameter (WHATWG: optional XPathResult to
+ // populate) is accepted-and-ignored: we always allocate fresh,
+ // which matches every modern browser's effective behavior.
+ _ = result;
+
+ const arena = try frame.getArena(.medium, "XPathResult");
+ errdefer frame.releaseArena(arena);
+
+ const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame);
+ return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result);
+}
+
+pub fn deinit(self: *XPathExpression, page: *Page) void {
+ page.releaseArena(self._arena);
+}
+
+pub fn acquireRef(self: *XPathExpression) void {
+ self._rc.acquire();
+}
+
+pub fn releaseRef(self: *XPathExpression, page: *Page) void {
+ self._rc.release(self, page);
+}
+
+pub const JsApi = struct {
+ pub const bridge = js.Bridge(XPathExpression);
+
+ pub const Meta = struct {
+ pub const name = "XPathExpression";
+ pub const prototype_chain = bridge.prototypeChain();
+ pub var class_id: bridge.ClassId = undefined;
+ };
+
+ pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true });
+};
diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig
new file mode 100644
index 00000000..c029b48e
--- /dev/null
+++ b/src/browser/webapi/XPathResult.zig
@@ -0,0 +1,288 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! WHATWG `XPathResult` (full surface, all 10 type constants — decision
+//! #4). Wraps the evaluator's `result.Result` for JS consumption:
+//! coerces to the requested result type at construction, exposes the
+//! type-tagged accessors, and serves the iterator/snapshot APIs.
+//!
+//! Lifetime model: each `XPathResult` owns a per-instance arena
+//! (`getArena(.medium, ...)`) that holds both the struct and the result
+//! data (node-set slice, formatted strings). The arena is released in
+//! `deinit` once the JS wrapper's refcount hits zero.
+//!
+//! Type-mismatch accessor calls return `error.InvalidStateError` —
+//! translated to a `DOMException` by `bridge.function(.., .{
+//! .dom_exception = true })`. The WHATWG IDL technically specifies
+//! `TypeError` for type mismatches, but `InvalidStateError` is what
+//! decision #4 captures and what most legacy XPath consumers expect.
+
+const std = @import("std");
+const lp = @import("lightpanda");
+
+const js = @import("../js/js.zig");
+const Page = @import("../Page.zig");
+const Frame = @import("../Frame.zig");
+
+const Node = @import("Node.zig");
+
+// XPath runtime helpers. Aliased to keep the cross-directory imports
+// readable when both modules expose a `Result` type.
+const xpath = struct {
+ const result = @import("../xpath/result.zig");
+ const Parser = @import("../xpath/Parser.zig");
+ const Evaluator = @import("../xpath/Evaluator.zig");
+};
+
+const Allocator = std.mem.Allocator;
+
+const XPathResult = @This();
+
+// WHATWG type constants. ANY_TYPE is a request flag — at construction
+// it resolves to one of the four concrete categories (NUMBER, STRING,
+// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression
+// produced.
+pub const ANY_TYPE: u16 = 0;
+pub const NUMBER_TYPE: u16 = 1;
+pub const STRING_TYPE: u16 = 2;
+pub const BOOLEAN_TYPE: u16 = 3;
+pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4;
+pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5;
+pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6;
+pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7;
+pub const ANY_UNORDERED_NODE_TYPE: u16 = 8;
+pub const FIRST_ORDERED_NODE_TYPE: u16 = 9;
+
+const Value = union(enum) {
+ number: f64,
+ string: []const u8,
+ boolean: bool,
+ nodes: []const *Node,
+};
+
+_rc: lp.RC(u8) = .{},
+_arena: Allocator,
+_type: u16,
+_value: Value,
+_iter_pos: usize = 0,
+
+// ----- constructors -----
+
+/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and
+/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the
+/// AST + result data + the struct itself.
+pub fn fromExpression(
+ expression: []const u8,
+ context_node: *Node,
+ requested_type: u16,
+ frame: *Frame,
+) !*XPathResult {
+ const arena = try frame.getArena(.medium, "XPathResult");
+ errdefer frame.releaseArena(arena);
+
+ // The AST borrows string slices from its input (literals, names,
+ // var refs, function names). `expression` is materialized in the JS
+ // call_arena and is reclaimed when the top-level call returns, so
+ // dupe into our long-lived arena before parsing.
+ const owned = try arena.dupe(u8, expression);
+ const expr = try xpath.Parser.parse(arena, owned);
+ const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame);
+ return fromResult(arena, requested_type, result);
+}
+
+/// Wrap an already-evaluated `result.result` into an XPathResult. The
+/// caller hands over ownership of `arena` — the XPathResult will release
+/// it on deinit. Used by `XPathExpression.evaluate` (which has its own
+/// AST cache and only allocates a fresh result arena).
+pub fn fromResult(
+ arena: Allocator,
+ requested_type: u16,
+ result: xpath.result.Result,
+) !*XPathResult {
+ const value: Value = switch (requested_type) {
+ ANY_TYPE => switch (result) {
+ .number => |n| .{ .number = n },
+ .string => |s| .{ .string = s },
+ .boolean => |b| .{ .boolean = b },
+ .node_set => |ns| .{ .nodes = ns },
+ },
+ NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) },
+ STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) },
+ BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) },
+ UNORDERED_NODE_ITERATOR_TYPE,
+ ORDERED_NODE_ITERATOR_TYPE,
+ UNORDERED_NODE_SNAPSHOT_TYPE,
+ ORDERED_NODE_SNAPSHOT_TYPE,
+ ANY_UNORDERED_NODE_TYPE,
+ FIRST_ORDERED_NODE_TYPE,
+ => switch (result) {
+ .node_set => |ns| .{ .nodes = ns },
+ // Requesting a node-set type for a non-node-set expression.
+ // WHATWG specifies TypeError, but DOMException.fromError has
+ // no TypeError mapping (would surface as a plain JS Error);
+ // unify on InvalidStateError per the project plan.
+ else => return error.InvalidStateError,
+ },
+ else => return error.InvalidStateError,
+ };
+
+ const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) {
+ .number => NUMBER_TYPE,
+ .string => STRING_TYPE,
+ .boolean => BOOLEAN_TYPE,
+ .nodes => UNORDERED_NODE_ITERATOR_TYPE,
+ } else requested_type;
+
+ const xr = try arena.create(XPathResult);
+ xr.* = .{
+ ._arena = arena,
+ ._type = final_type,
+ ._value = value,
+ };
+ return xr;
+}
+
+// ----- lifecycle -----
+
+pub fn deinit(self: *XPathResult, page: *Page) void {
+ page.releaseArena(self._arena);
+}
+
+pub fn acquireRef(self: *XPathResult) void {
+ self._rc.acquire();
+}
+
+pub fn releaseRef(self: *XPathResult, page: *Page) void {
+ self._rc.release(self, page);
+}
+
+// ----- accessors -----
+
+fn getResultType(self: *const XPathResult) u16 {
+ return self._type;
+}
+
+fn getNumberValue(self: *const XPathResult) !f64 {
+ if (self._type != NUMBER_TYPE) return error.InvalidStateError;
+ return self._value.number;
+}
+
+fn getStringValue(self: *const XPathResult) ![]const u8 {
+ if (self._type != STRING_TYPE) return error.InvalidStateError;
+ return self._value.string;
+}
+
+fn getBooleanValue(self: *const XPathResult) !bool {
+ if (self._type != BOOLEAN_TYPE) return error.InvalidStateError;
+ return self._value.boolean;
+}
+
+fn getSingleNodeValue(self: *const XPathResult) !?*Node {
+ if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) {
+ return error.InvalidStateError;
+ }
+ return if (self._value.nodes.len == 0) null else self._value.nodes[0];
+}
+
+fn getSnapshotLength(self: *const XPathResult) !u32 {
+ if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
+ return error.InvalidStateError;
+ }
+ return @intCast(self._value.nodes.len);
+}
+
+/// Live mutation tracking on the iterator isn't implemented — we hold a
+/// frozen pointer slice, so the iterator is never "invalidated" by DOM
+/// edits during traversal. Always returns false; matches the polyfill,
+/// which is snapshot-only.
+fn getInvalidIteratorState(_: *const XPathResult) bool {
+ return false;
+}
+
+// ----- methods -----
+
+pub fn iterateNext(self: *XPathResult) !?*Node {
+ if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) {
+ return error.InvalidStateError;
+ }
+ const pos = self._iter_pos;
+ if (pos >= self._value.nodes.len) return null;
+ const node = self._value.nodes[pos];
+ self._iter_pos = pos + 1;
+ return node;
+}
+
+pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node {
+ if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
+ return error.InvalidStateError;
+ }
+ if (index >= self._value.nodes.len) return null;
+ return self._value.nodes[index];
+}
+
+// ----- JS bridge -----
+
+pub const JsApi = struct {
+ pub const bridge = js.Bridge(XPathResult);
+
+ pub const Meta = struct {
+ pub const name = "XPathResult";
+ pub const prototype_chain = bridge.prototypeChain();
+ pub var class_id: bridge.ClassId = undefined;
+ };
+
+ // Type constants — both static (on the constructor) and instance
+ // properties per the WHATWG IDL. `template = true` makes them
+ // class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works.
+ pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true });
+ pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true });
+ pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true });
+ pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true });
+ pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
+ pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
+ pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
+ pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
+ pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true });
+ pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true });
+
+ pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{});
+ pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true });
+ pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true });
+ pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true });
+ pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true });
+ pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true });
+ pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{});
+
+ pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true });
+ pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true });
+};
+
+const testing = @import("../../testing.zig");
+
+test "WebApi: XPathResult" {
+ try testing.htmlRunner("xpath/xpath_result.html", .{});
+}
+
+test "WebApi: XPath conformance" {
+ try testing.htmlRunner("xpath/xpath_conformance.html", .{});
+}
+
+// This uses console.warn, uncomment if you want to run it
+// test "WebApi: XPath perf" {
+// try testing.htmlRunner("xpath/xpath_perf.html", .{});
+// }
diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig
index 8f561355..1b83dacf 100644
--- a/src/browser/webapi/element/html/Link.zig
+++ b/src/browser/webapi/element/html/Link.zig
@@ -71,8 +71,16 @@ pub fn setAs(self: *Link, value: []const u8, frame: *Frame) !void {
return self.asElement().setAttributeSafe(comptime .wrap("as"), .wrap(value), frame);
}
+pub fn getMedia(self: *Link) []const u8 {
+ return self.asElement().getAttributeSafe(comptime .wrap("media")) orelse return "";
+}
+
+pub fn setMedia(self: *Link, value: []const u8, frame: *Frame) !void {
+ return self.asElement().setAttributeSafe(comptime .wrap("media"), .wrap(value), frame);
+}
+
pub fn getCrossOrigin(self: *const Link) ?[]const u8 {
- return self.asConstElement().getAttributeSafe(comptime .wrap("crossOrigin"));
+ return self.asConstElement().getAttributeSafe(comptime .wrap("crossorigin"));
}
pub fn setCrossOrigin(self: *Link, value: []const u8, frame: *Frame) !void {
@@ -80,7 +88,7 @@ pub fn setCrossOrigin(self: *Link, value: []const u8, frame: *Frame) !void {
if (std.ascii.eqlIgnoreCase(value, "use-credentials")) {
normalized = "use-credentials";
}
- return self.asElement().setAttributeSafe(comptime .wrap("crossOrigin"), .wrap(normalized), frame);
+ return self.asElement().setAttributeSafe(comptime .wrap("crossorigin"), .wrap(normalized), frame);
}
pub fn linkAddedCallback(self: *Link, frame: *Frame) !void {
@@ -120,6 +128,7 @@ pub const JsApi = struct {
pub const as = bridge.accessor(Link.getAs, Link.setAs, .{});
pub const rel = bridge.accessor(Link.getRel, Link.setRel, .{});
+ pub const media = bridge.accessor(Link.getMedia, Link.setMedia, .{});
pub const href = bridge.accessor(Link.getHref, Link.setHref, .{});
pub const crossOrigin = bridge.accessor(Link.getCrossOrigin, Link.setCrossOrigin, .{});
pub const relList = bridge.accessor(_getRelList, null, .{ .null_as_undefined = true });
diff --git a/src/browser/webapi/selector/Selector.zig b/src/browser/webapi/selector/Selector.zig
index 2591ce6c..7322e02e 100644
--- a/src/browser/webapi/selector/Selector.zig
+++ b/src/browser/webapi/selector/Selector.zig
@@ -28,6 +28,22 @@ pub const List = @import("List.zig");
const String = lp.String;
const Allocator = std.mem.Allocator;
+// translate a Selector error to a DOMException known type.
+pub fn mapErrorToDOM(err: anyerror) anyerror {
+ return switch (err) {
+ error.InvalidSelector,
+ error.InvalidAttributeSelector,
+ error.InvalidIDSelector,
+ error.InvalidClassSelector,
+ error.UnknownPseudoClass,
+ error.InvalidTagSelector,
+ error.InvalidPseudoClass,
+ error.InvalidNthPattern,
+ => error.SyntaxError,
+ else => err,
+ };
+}
+
pub fn parseLeaky(arena: Allocator, input: []const u8) !Parsed {
if (input.len == 0) {
return error.SyntaxError;
diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig
new file mode 100644
index 00000000..00788023
--- /dev/null
+++ b/src/browser/xpath/Evaluator.zig
@@ -0,0 +1,987 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and
+//! produces a `Result`. The evaluator allocates intermediate values
+//! (node-set slices, formatted numbers, materialized attribute nodes)
+//! into the caller's arena. The context `Frame` is needed for
+//! `getElementById` and to materialize attributes (the attribute axis
+//! returns full `Attribute` nodes so the result is `*Node`-uniform).
+//!
+//! Document-order sort happens once at the public boundary
+//! (`evaluate()`); intermediate step results stay in axis order so
+//! reverse-axis positional predicates evaluate against proximity.
+
+const std = @import("std");
+const lp = @import("lightpanda");
+
+const Node = @import("../webapi/Node.zig");
+
+const ast = @import("ast.zig");
+const Parser = @import("Parser.zig");
+const result = @import("result.zig");
+const functions = @import("functions.zig");
+
+const Frame = lp.Frame;
+const Element = Node.Element;
+const Document = Node.Document;
+const Allocator = std.mem.Allocator;
+
+const Evaluator = @This();
+
+pub const Error = error{
+ OutOfMemory,
+ WriteFailed,
+ // Surfaces from Attribute materialization (`Entry.toAttribute` →
+ // `String.dupe` enforces a length limit). The polyfill never hits
+ // this since JS strings are unbounded, but Lightpanda's `String`
+ // type caps at u32::MAX bytes — propagate so callers can surface
+ // a DOM exception.
+ StringTooLarge,
+ UnknownFunction,
+ UnionRequiresNodeSets,
+};
+
+arena: Allocator,
+frame: *Frame,
+
+/// Public entry. Returns the AST's value; node-sets are sorted into
+/// document order before return per XPath spec §3.3.
+pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result {
+ var ev = Evaluator{ .arena = arena, .frame = frame };
+ const res = try ev.evalExpr(expr, context_node, 1, 1);
+ if (res == .node_set) {
+ sortDocOrder(@constCast(res.node_set));
+ }
+ return res;
+}
+
+pub const SearchError = Error || Parser.Error;
+
+/// Convenience for `DOM.performSearch`: parse + evaluate and unwrap the
+/// node-set. Top-level scalar expressions yield an empty slice
+/// (decision #3 — these APIs are for finding nodes, not arbitrary
+/// computation).
+pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node {
+ const expr = try Parser.parse(arena, expression);
+ return switch (try evaluate(arena, expr, root, frame)) {
+ .node_set => |ns| ns,
+ else => &.{},
+ };
+}
+
+// ----- AST evaluation -----
+
+fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result {
+ return switch (expr.*) {
+ .number => |n| .{ .number = n },
+ .literal => |s| .{ .string = s },
+ .var_ref => .{ .string = "" }, // decision #3 stub
+ .neg => |inner| blk: {
+ const v = try self.evalExpr(inner, ctx, pos, size);
+ const n = try result.toNumber(self.arena, v);
+ break :blk .{ .number = -n };
+ },
+ .binop => |bo| try self.evalBinop(bo, ctx, pos, size),
+ .path => |p| try self.evalPath(p, ctx),
+ .filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size),
+ .filter => |f| try self.evalFilter(f, ctx, pos, size),
+ .fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size),
+ };
+}
+
+fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result {
+ if (try self.tryIdLookupFastPath(path, ctx)) |res| return res;
+ if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res;
+
+ const start: *Node = if (path.absolute) blk: {
+ if (ctx._type == .document) break :blk ctx;
+ const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
+ break :blk owner.asNode();
+ } else ctx;
+
+ var current = try self.arena.alloc(*Node, 1);
+ current[0] = start;
+ var current_set: []const *Node = current;
+
+ for (path.steps) |step| {
+ const r = try self.evalStep(current_set, step);
+ current_set = r.node_set;
+ }
+ return .{ .node_set = current_set };
+}
+
+// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
+// shapes (and their wildcard `//*[@id='x']` variants) and serve them
+// directly from `frame.getElementByIdFromNode`. Accepts the literal on
+// either side of `=`.
+//
+// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
+// already makes for `querySelector(All)`: the id-map only stores the
+// first element per ID in document order, so duplicate IDs (invalid
+// HTML, but possible) yield one match here where a strict tree walk
+// would find all. Acceptable because Capybara/Selenium hot paths
+// assume unique IDs and CSS has shipped this compromise for years.
+//
+// Falls through to the general path for any deviation: extra steps,
+// extra predicates, non-eq predicate, non-literal RHS, or the
+// inability to resolve a search root.
+fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
+ // Two acceptable AST shapes:
+ // //tag[@id='x'] parses to: ds::node() / child::tag[pred]
+ // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
+ const target = matchDescendantPathShape(path) orelse return null;
+
+ if (target.axis != .child) return null;
+ if (target.predicates.len != 1) return null;
+
+ // Tag name (null = wildcard "*"). type_test (e.g. `node()`,
+ // `text()`) doesn't qualify because getElementByIdFromNode only
+ // returns elements.
+ const tag_name: ?[]const u8 = switch (target.node_test) {
+ .name => |n| if (std.mem.eql(u8, n, "*")) null else n,
+ .type_test => return null,
+ };
+
+ const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
+
+ // Resolve search root the same way the general path does.
+ const search_root: *Node = if (path.absolute) blk: {
+ if (ctx._type == .document) break :blk ctx;
+ const owner = ctx.ownerDocument(self.frame) orelse return null;
+ break :blk owner.asNode();
+ } else ctx;
+
+ const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
+ return .{ .node_set = &.{} };
+ };
+ const id_node = id_element.asNode();
+
+ // Relative paths must filter to descendants of the context.
+ // getElementByIdFromNode is doc-wide.
+ if (search_root != id_node and !search_root.contains(id_node)) {
+ return .{ .node_set = &.{} };
+ }
+
+ // Tag check (case-insensitive per decision #2). Element tag names
+ // are stored lowercase via `getTagNameLower`; lowercase the AST
+ // name once and compare.
+ if (tag_name) |tag| {
+ const lowered = try std.ascii.allocLowerString(self.arena, tag);
+ if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
+ return .{ .node_set = &.{} };
+ }
+ }
+
+ const out = try self.arena.alloc(*Node, 1);
+ out[0] = id_node;
+ return .{ .node_set = out };
+}
+
+// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same
+// AST shape (`//[preds]` / `.//[preds]`), but instead of
+// dispatching to `getElementByIdFromNode`, walks the descendants of
+// the search root once in document order, applying the node test and
+// any "safe" non-positional predicates inline. Skips the general path's
+// per-step axis materialization, the per-step `filtered`/`current`
+// ArrayLists, and the dedup hash map (single-context forward walk
+// already preserves doc order).
+//
+// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`,
+// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`.
+//
+// "Safe" predicates: not numeric at the top level (number, neg,
+// arithmetic binop, or a fn-call returning a number), and free of
+// `position()`/`last()` anywhere in the predicate AST. Numeric predicates
+// would need `position()` context which the fused walk doesn't track,
+// and a `position()`/`last()` reference inside a sub-path's own step is
+// rejected conservatively even though it's local to that sub-axis.
+fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
+ const target = matchDescendantPathShape(path) orelse return null;
+ if (target.axis != .child) return null;
+
+ for (target.predicates) |p| {
+ if (!isSafeNonPositionalPredicate(p)) return null;
+ }
+
+ const lowered_name: ?[]const u8 = switch (target.node_test) {
+ .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
+ .type_test => null,
+ };
+
+ const search_root: *Node = if (path.absolute) blk: {
+ if (ctx._type == .document) break :blk ctx;
+ const owner = ctx.ownerDocument(self.frame) orelse return null;
+ break :blk owner.asNode();
+ } else ctx;
+
+ var out: std.ArrayList(*Node) = .empty;
+ try self.fusedDescend(search_root, target, lowered_name, &out);
+ return .{ .node_set = out.items };
+}
+
+fn fusedDescend(
+ self: *Evaluator,
+ parent: *Node,
+ target: ast.Step,
+ lowered_name: ?[]const u8,
+ out: *std.ArrayList(*Node),
+) Error!void {
+ var it = parent.childrenIterator();
+ while (it.next()) |c| {
+ if (matchTest(c, target.node_test, target.axis, lowered_name)) {
+ var ok = true;
+ for (target.predicates) |pred| {
+ // Position / size are synthetic. Safe because the
+ // predicate-safety gate already rejected any expression
+ // that depends on either.
+ const val = try self.evalExpr(pred, c, 1, 1);
+ if (!result.toBoolean(val)) {
+ ok = false;
+ break;
+ }
+ }
+ if (ok) try out.append(self.arena, c);
+ }
+ try self.fusedDescend(c, target, lowered_name, out);
+ }
+}
+
+fn matchDescendantPathShape(path: ast.Path) ?ast.Step {
+ return switch (path.steps.len) {
+ 2 => blk: {
+ if (!isDescendantOrSelfNode(path.steps[0])) break :blk null;
+ break :blk path.steps[1];
+ },
+ 3 => blk: {
+ if (!isSelfNode(path.steps[0])) break :blk null;
+ if (!isDescendantOrSelfNode(path.steps[1])) break :blk null;
+ break :blk path.steps[2];
+ },
+ else => null,
+ };
+}
+
+fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool {
+ if (isNumericTopLevel(expr)) return false;
+ if (containsPositionOrLast(expr)) return false;
+ return true;
+}
+
+fn isNumericTopLevel(expr: *const ast.Expr) bool {
+ return switch (expr.*) {
+ .number, .neg => true,
+ .binop => |bo| switch (bo.op) {
+ .add, .sub, .mul, .div, .mod => true,
+ else => false,
+ },
+ .fn_call => |fc| isNumericFnName(fc.name),
+ else => false,
+ };
+}
+
+fn isNumericFnName(name: []const u8) bool {
+ const numeric = [_][]const u8{
+ "position", "last", "count", "sum",
+ "floor", "ceiling", "round", "number",
+ "string-length",
+ };
+ for (numeric) |n| {
+ if (std.mem.eql(u8, name, n)) return true;
+ }
+ return false;
+}
+
+fn containsPositionOrLast(expr: *const ast.Expr) bool {
+ return switch (expr.*) {
+ .number, .literal, .var_ref => false,
+ .neg => |inner| containsPositionOrLast(inner),
+ .binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right),
+ .filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate),
+ .filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps),
+ .path => |p| stepsContainPositionOrLast(p.steps),
+ .fn_call => |fc| std.mem.eql(u8, fc.name, "position") or
+ std.mem.eql(u8, fc.name, "last") or
+ argsContainPositionOrLast(fc.args),
+ };
+}
+
+fn stepsContainPositionOrLast(steps: []const ast.Step) bool {
+ for (steps) |s| {
+ for (s.predicates) |p| {
+ if (containsPositionOrLast(p)) return true;
+ }
+ }
+ return false;
+}
+
+fn argsContainPositionOrLast(args: []const *ast.Expr) bool {
+ for (args) |a| {
+ if (containsPositionOrLast(a)) return true;
+ }
+ return false;
+}
+
+fn isDescendantOrSelfNode(s: ast.Step) bool {
+ if (s.axis != .descendant_or_self) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .type_test => |k| k == .node,
+ .name => false,
+ };
+}
+
+fn isSelfNode(s: ast.Step) bool {
+ if (s.axis != .self) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .type_test => |k| k == .node,
+ .name => false,
+ };
+}
+
+fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 {
+ if (expr.* != .binop) return null;
+ const bo = expr.binop;
+ if (bo.op != .eq) return null;
+ if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
+ if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
+ return null;
+}
+
+fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool {
+ if (expr.* != .path) return false;
+ const p = expr.path;
+ if (p.absolute) return false;
+ if (p.steps.len != 1) return false;
+ const s = p.steps[0];
+ if (s.axis != .attribute) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .name => |n| std.mem.eql(u8, n, attr_name),
+ .type_test => false,
+ };
+}
+
+fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result {
+ const base = try self.evalExpr(fp.filter, ctx, pos, size);
+ if (base != .node_set) return base;
+
+ var current: []const *Node = base.node_set;
+ for (fp.steps) |step| {
+ const r = try self.evalStep(current, step);
+ current = r.node_set;
+ }
+ return .{ .node_set = current };
+}
+
+fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result {
+ const base = try self.evalExpr(f.expr, ctx, pos, size);
+ if (base != .node_set) return base;
+
+ var out: std.ArrayList(*Node) = .empty;
+ const sz = base.node_set.len;
+ for (base.node_set, 0..) |n, idx| {
+ const k = idx + 1;
+ const val = try self.evalExpr(f.predicate, n, k, sz);
+ if (predicateMatches(val, k)) try out.append(self.arena, n);
+ }
+ return .{ .node_set = out.items };
+}
+
+// ----- step + axis -----
+
+fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result {
+ var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
+
+ // Pre-lowercase the name test once per step. matchNameTest does
+ // case-insensitive matching (decision #2); without this hoist, every
+ // axis node would pay the per-byte case-fold inside `eqlIgnoreCase`.
+ const lowered_name: ?[]const u8 = switch (step.node_test) {
+ .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
+ .type_test => null,
+ };
+
+ for (ctx_nodes) |ctx| {
+ const axis_nodes = try self.axisNodes(ctx, step.axis);
+
+ var filtered: std.ArrayList(*Node) = .empty;
+ for (axis_nodes) |n| {
+ if (matchTest(n, step.node_test, step.axis, lowered_name)) {
+ try filtered.append(self.arena, n);
+ }
+ }
+
+ var current: []const *Node = filtered.items;
+ for (step.predicates) |pred| {
+ var next: std.ArrayList(*Node) = .empty;
+ const sz = current.len;
+ for (current, 0..) |n, idx| {
+ const k = idx + 1;
+ const val = try self.evalExpr(pred, n, k, sz);
+ if (predicateMatches(val, k)) try next.append(self.arena, n);
+ }
+ current = next.items;
+ }
+
+ for (current) |n| try dedup.put(self.arena, n, {});
+ }
+
+ return .{ .node_set = dedup.keys() };
+}
+
+fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node {
+ var out: std.ArrayList(*Node) = .empty;
+ switch (axis) {
+ .child => {
+ var it = node.childrenIterator();
+ while (it.next()) |c| try out.append(self.arena, c);
+ },
+ .descendant => try self.appendDescendants(node, &out),
+ .descendant_or_self => {
+ try out.append(self.arena, node);
+ try self.appendDescendants(node, &out);
+ },
+ .self => try out.append(self.arena, node),
+ .parent => {
+ if (node.parentNode()) |p| try out.append(self.arena, p);
+ },
+ // Reverse axes — proximity order (nearest first). Final node-set
+ // is sorted to document order at the public boundary.
+ .ancestor => {
+ var p = node.parentNode();
+ while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
+ },
+ .ancestor_or_self => {
+ try out.append(self.arena, node);
+ var p = node.parentNode();
+ while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
+ },
+ .following_sibling => {
+ var s = node.nextSibling();
+ while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n);
+ },
+ .preceding_sibling => {
+ var s = node.previousSibling();
+ while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n);
+ },
+ .following => try self.appendFollowing(node, &out),
+ .preceding => try self.appendPreceding(node, &out),
+ .attribute => try self.appendAttributes(node, &out),
+ .namespace, .unknown => {}, // decision #3 stubs
+ }
+ return out.items;
+}
+
+fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
+ var it = node.childrenIterator();
+ while (it.next()) |c| {
+ try out.append(self.arena, c);
+ try self.appendDescendants(c, out);
+ }
+}
+
+fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
+ var n: ?*Node = start;
+ while (n) |cur| : (n = cur.parentNode()) {
+ var s = cur.nextSibling();
+ while (s) |sn| : (s = sn.nextSibling()) {
+ try out.append(self.arena, sn);
+ try self.appendDescendants(sn, out);
+ }
+ }
+}
+
+fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void {
+ // Reverse document order: deepest-last children first, then self.
+ var c = n.lastChild();
+ while (c) |child| : (c = child.previousSibling()) {
+ try self.appendPrecedingSubtree(child, out);
+ }
+ try out.append(self.arena, n);
+}
+
+fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
+ var n: ?*Node = start;
+ while (n) |cur| {
+ const parent = cur.parentNode() orelse break;
+ var s = cur.previousSibling();
+ while (s) |sn| : (s = sn.previousSibling()) {
+ try self.appendPrecedingSubtree(sn, out);
+ }
+ n = parent;
+ }
+}
+
+fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
+ const el = node.is(Element) orelse return;
+ var it = el.attributeIterator();
+ while (it.next()) |entry| {
+ // Memoize via frame._attribute_lookup so repeated XPath queries
+ // (Capybara/Selenium polling) reuse the same *Attribute instead
+ // of leaking fresh ones into page-lifetime storage on every call.
+ // Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex.
+ const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry));
+ if (!gop.found_existing) {
+ gop.value_ptr.* = try entry.toAttribute(el, self.frame);
+ }
+ try out.append(self.arena, gop.value_ptr.*._proto);
+ }
+}
+
+// ----- node test matching -----
+
+fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool {
+ return switch (test_) {
+ .type_test => |kind| switch (kind) {
+ .node => true,
+ // XPath 1.0 §5.7: the data model has no CDATASection node —
+ // CDATA content is part of the text node value. Match both
+ // Text (3) and CDATASection (4) DOM node types.
+ .text => node.getNodeType() == 3 or node.getNodeType() == 4,
+ .comment => node.getNodeType() == 8,
+ .processing_instruction => node.getNodeType() == 7,
+ },
+ .name => |name| matchNameTest(node, name, axis, lowered_name),
+ };
+}
+
+fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool {
+ // `lowered_name` is non-null iff `name != "*"`. Element tag names
+ // (`getTagNameLower`) and html5ever-stored attribute names are already
+ // lowercase, so a plain `mem.eql` against the pre-lowered test name
+ // replaces the per-call `eqlIgnoreCase`.
+ if (axis == .attribute) {
+ if (std.mem.eql(u8, name, "*")) return node._type == .attribute;
+ const attr = switch (node._type) {
+ .attribute => |a| a,
+ else => return false,
+ };
+ return std.mem.eql(u8, attr._name.str(), lowered_name.?);
+ }
+ const el = node.is(Element) orelse return false;
+ if (std.mem.eql(u8, name, "*")) return true;
+ return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?);
+}
+
+// ----- binop -----
+
+fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result {
+ switch (bo.op) {
+ .or_ => {
+ const l = try self.evalExpr(bo.left, ctx, pos, size);
+ if (result.toBoolean(l)) return .{ .boolean = true };
+ const r = try self.evalExpr(bo.right, ctx, pos, size);
+ return .{ .boolean = result.toBoolean(r) };
+ },
+ .and_ => {
+ const l = try self.evalExpr(bo.left, ctx, pos, size);
+ if (!result.toBoolean(l)) return .{ .boolean = false };
+ const r = try self.evalExpr(bo.right, ctx, pos, size);
+ return .{ .boolean = result.toBoolean(r) };
+ },
+ .eq, .neq, .lt, .gt, .lte, .gte => {
+ const l = try self.evalExpr(bo.left, ctx, pos, size);
+ const r = try self.evalExpr(bo.right, ctx, pos, size);
+ return .{ .boolean = try self.xCmp(l, r, bo.op) };
+ },
+ .add, .sub, .mul, .div, .mod => {
+ const l = try self.evalExpr(bo.left, ctx, pos, size);
+ const r = try self.evalExpr(bo.right, ctx, pos, size);
+ const ln = try result.toNumber(self.arena, l);
+ const rn = try result.toNumber(self.arena, r);
+ const v: f64 = switch (bo.op) {
+ .add => ln + rn,
+ .sub => ln - rn,
+ .mul => ln * rn,
+ .div => ln / rn,
+ // JS `%` and Zig `@rem` agree on sign for finite values
+ // and propagate NaN (XPath §3.5).
+ .mod => @rem(ln, rn),
+ else => unreachable,
+ };
+ return .{ .number = v };
+ },
+ .union_ => {
+ const l = try self.evalExpr(bo.left, ctx, pos, size);
+ const r = try self.evalExpr(bo.right, ctx, pos, size);
+ if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets;
+ var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
+ for (l.node_set) |n| try seen.put(self.arena, n, {});
+ for (r.node_set) |n| try seen.put(self.arena, n, {});
+ const nodes = seen.keys();
+ sortDocOrder(@constCast(nodes));
+ return .{ .node_set = nodes };
+ },
+ }
+}
+
+// ----- comparison (XPath spec §3.4) -----
+
+fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool {
+ const is_eq = (op == .eq or op == .neq);
+ const l_is_set = (left == .node_set);
+ const r_is_set = (right == .node_set);
+
+ if (l_is_set and r_is_set) {
+ // Cache right-side string-values once. Without this, each left node
+ // would pay |right| allocations — O(N×M) for a set×set comparison
+ // (e.g. `//foo = //bar` on a large page).
+ const right_strings = try self.arena.alloc([]const u8, right.node_set.len);
+ for (right.node_set, 0..) |r, i| {
+ right_strings[i] = try result.stringValueOf(self.arena, r);
+ }
+ for (left.node_set) |l| {
+ const lv = try result.stringValueOf(self.arena, l);
+ for (right_strings) |rv| {
+ const matched = if (is_eq)
+ cmpString(lv, rv, op)
+ else
+ cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op);
+ if (matched) return true;
+ }
+ }
+ return false;
+ }
+
+ if (l_is_set or r_is_set) {
+ const ns = if (l_is_set) left.node_set else right.node_set;
+ const other = if (l_is_set) right else left;
+ const ns_left = l_is_set;
+
+ if (other == .boolean) {
+ const ns_b = ns.len > 0;
+ const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b };
+ return cmpBool(a, b, op);
+ }
+
+ for (ns) |n| {
+ const sv = try result.stringValueOf(self.arena, n);
+ const matched = switch (other) {
+ .number => |num| blk: {
+ const sv_num = result.stringToNumber(sv);
+ const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num };
+ break :blk cmpNumber(a, b, op);
+ },
+ .string => |s| blk: {
+ if (is_eq) {
+ const a, const b = if (ns_left) .{ sv, s } else .{ s, sv };
+ break :blk cmpString(a, b, op);
+ }
+ const sv_num = result.stringToNumber(sv);
+ const s_num = result.stringToNumber(s);
+ const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num };
+ break :blk cmpNumber(a, b, op);
+ },
+ .boolean, .node_set => unreachable, // handled above
+ };
+ if (matched) return true;
+ }
+ return false;
+ }
+
+ // Neither is a node-set.
+ if (is_eq) {
+ if (left == .boolean or right == .boolean) {
+ return cmpBool(result.toBoolean(left), result.toBoolean(right), op);
+ }
+ if (left == .number or right == .number) {
+ const ln = try result.toNumber(self.arena, left);
+ const rn = try result.toNumber(self.arena, right);
+ return cmpNumber(ln, rn, op);
+ }
+ const ls = try result.toString(self.arena, left);
+ const rs = try result.toString(self.arena, right);
+ return cmpString(ls, rs, op);
+ }
+ // Non-eq with no node-set: both → number.
+ const ln = try result.toNumber(self.arena, left);
+ const rn = try result.toNumber(self.arena, right);
+ return cmpNumber(ln, rn, op);
+}
+
+fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool {
+ const equal = std.mem.eql(u8, a, b);
+ return switch (op) {
+ .eq => equal,
+ .neq => !equal,
+ else => unreachable, // <, > etc. always coerce to number first
+ };
+}
+
+fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool {
+ // Native f64 comparison gives correct NaN semantics:
+ // NaN == X is false, NaN != X is true, NaN < X (etc.) is false.
+ return switch (op) {
+ .eq => a == b,
+ .neq => a != b,
+ .lt => a < b,
+ .gt => a > b,
+ .lte => a <= b,
+ .gte => a >= b,
+ else => unreachable,
+ };
+}
+
+fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool {
+ return switch (op) {
+ .eq => a == b,
+ .neq => a != b,
+ else => unreachable,
+ };
+}
+
+// ----- function calls -----
+
+fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result {
+ // position()/last() stay here — they need the (pos, size) closure
+ // that functions.call doesn't see. Keeping them inline avoids
+ // pushing per-call context through Functions' signature.
+ if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) };
+ if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) };
+
+ // Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)`
+ // pattern; lazy short-circuit isn't needed because `or`/`and` are
+ // binops handled in evalBinop, not function calls.
+ const eval_args = try self.arena.alloc(result.Result, fc.args.len);
+ for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size);
+
+ return functions.call(self.arena, fc.name, eval_args, ctx, self.frame);
+}
+
+// ----- helpers -----
+
+fn predicateMatches(val: result.Result, position: usize) bool {
+ return switch (val) {
+ // Numeric predicate value selects only the node at that position
+ // (1-based). Non-integer numbers never match.
+ .number => |n| n == @as(f64, @floatFromInt(position)),
+ else => result.toBoolean(val),
+ };
+}
+
+pub fn sortDocOrder(nodes: []*Node) void {
+ if (nodes.len <= 1) return;
+ std.mem.sort(*Node, nodes, {}, lessThanDocOrder);
+}
+
+fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool {
+ if (a == b) return false;
+ const pos = a.compareDocumentPosition(b);
+ // FOLLOWING (0x04) — b comes after a in document order.
+ return (pos & 0x04) != 0;
+}
+
+// ---------------------------------------------------------------------
+// Tests — pure-logic only. DOM-dependent evaluation lands as HTML
+// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig
+// way to construct a Frame + Document tree without the JS runtime.
+// ---------------------------------------------------------------------
+
+const testing = std.testing;
+const Tokenizer = @import("Tokenizer.zig");
+
+test "Evaluator: cmpNumber NaN semantics" {
+ const nan = std.math.nan(f64);
+ try testing.expect(!cmpNumber(nan, nan, .eq));
+ try testing.expect(cmpNumber(nan, nan, .neq));
+ try testing.expect(!cmpNumber(nan, 0, .lt));
+ try testing.expect(!cmpNumber(nan, 0, .gt));
+ try testing.expect(!cmpNumber(nan, 0, .lte));
+ try testing.expect(!cmpNumber(nan, 0, .gte));
+ try testing.expect(cmpNumber(0, 0, .eq));
+ try testing.expect(cmpNumber(1, 2, .lt));
+ try testing.expect(cmpNumber(2, 1, .gt));
+ try testing.expect(cmpNumber(1, 1, .lte));
+ try testing.expect(cmpNumber(1, 1, .gte));
+}
+
+test "Evaluator: cmpString" {
+ try testing.expect(cmpString("a", "a", .eq));
+ try testing.expect(!cmpString("a", "b", .eq));
+ try testing.expect(cmpString("a", "b", .neq));
+ try testing.expect(!cmpString("a", "a", .neq));
+}
+
+test "Evaluator: cmpBool" {
+ try testing.expect(cmpBool(true, true, .eq));
+ try testing.expect(!cmpBool(true, false, .eq));
+ try testing.expect(cmpBool(true, false, .neq));
+}
+
+test "Evaluator: predicateMatches numeric vs boolean" {
+ try testing.expect(predicateMatches(.{ .number = 1 }, 1));
+ try testing.expect(!predicateMatches(.{ .number = 2 }, 1));
+ // Non-integer never matches.
+ try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1));
+ // Boolean: any truthy value passes regardless of position.
+ try testing.expect(predicateMatches(.{ .boolean = true }, 7));
+ try testing.expect(!predicateMatches(.{ .boolean = false }, 1));
+ // String: nonempty truthy.
+ try testing.expect(predicateMatches(.{ .string = "x" }, 99));
+ try testing.expect(!predicateMatches(.{ .string = "" }, 1));
+ // Empty node-set: falsy.
+ try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1));
+}
+
+test "Evaluator: scalar arithmetic via parsed expressions" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "1 + 2", 3 },
+ .{ "5 - 3", 2 },
+ .{ "4 * 2", 8 },
+ .{ "10 div 4", 2.5 },
+ .{ "10 mod 3", 1 },
+ .{ "-5", -5 },
+ .{ "1 + 2 * 3", 7 },
+ }) |case| {
+ const expr = try Parser.parse(a, case[0]);
+ // Frame is unused for pure-arithmetic AST. The unsafe cast lets
+ // us exercise binop / number paths without a real DOM. Any path
+ // accessing the Frame would crash; the inputs above never do.
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(r == .number);
+ try testing.expectEqual(@as(f64, case[1]), r.number);
+ }
+}
+
+test "Evaluator: scalar comparison via parsed expressions" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "1 = 1", true },
+ .{ "1 = 2", false },
+ .{ "1 != 2", true },
+ .{ "1 < 2", true },
+ .{ "2 < 1", false },
+ .{ "1 <= 1", true },
+ .{ "2 >= 2", true },
+ .{ "'abc' = 'abc'", true },
+ .{ "'abc' != 'abd'", true },
+ }) |case| {
+ const expr = try Parser.parse(a, case[0]);
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(r == .boolean);
+ try testing.expectEqual(case[1], r.boolean);
+ }
+}
+
+test "Evaluator: position() and last() reflect context" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+
+ {
+ const expr = try Parser.parse(a, "position()");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
+ try testing.expectEqual(@as(f64, 3), r.number);
+ }
+ {
+ const expr = try Parser.parse(a, "last()");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
+ try testing.expectEqual(@as(f64, 5), r.number);
+ }
+ {
+ // Logical short-circuit: last() never evaluates if first
+ // operand is true.
+ const expr = try Parser.parse(a, "1 = 1 or last() > 0");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(r.boolean);
+ }
+}
+
+test "Evaluator: short-circuit and/or" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+
+ inline for (.{
+ .{ "1 = 2 or 1 = 1", true },
+ .{ "1 = 1 and 1 = 2", false },
+ .{ "1 = 1 and 2 = 2", true },
+ .{ "1 = 2 and 1 = 1", false },
+ .{ "1 = 2 or 2 = 1", false },
+ }) |case| {
+ const expr = try Parser.parse(a, case[0]);
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(r == .boolean);
+ try testing.expectEqual(case[1], r.boolean);
+ }
+}
+
+test "Evaluator: unary minus" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+
+ const expr = try Parser.parse(a, "-(3 + 2)");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expectEqual(@as(f64, -5), r.number);
+}
+
+test "Evaluator: division by zero produces infinity / NaN per IEEE" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ const ctx_dummy: *Node = @ptrFromInt(0x2000);
+
+ {
+ const expr = try Parser.parse(a, "1 div 0");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(std.math.isPositiveInf(r.number));
+ }
+ {
+ const expr = try Parser.parse(a, "0 div 0");
+ var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
+ const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
+ try testing.expect(std.math.isNan(r.number));
+ }
+}
+
+test "Evaluator: searchAll on scalar expression returns empty (decision #3)" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ // Synthetic frame/root pointers are safe here because pure-scalar
+ // expressions (binop, literal, true(), comparison) never reach into
+ // the Frame or the context node. Adding a DOM-touching expression
+ // (e.g. `id('x')`) to this list would crash on dereference.
+ inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| {
+ const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000));
+ try testing.expectEqual(@as(usize, 0), nodes.len);
+ }
+}
diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig
new file mode 100644
index 00000000..60c82df5
--- /dev/null
+++ b/src/browser/xpath/Parser.zig
@@ -0,0 +1,957 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 expression parser.
+//!
+//! Recursive descent over a fully-tokenized stream, producing an
+//! `ast.Expr` tree allocated on the caller's arena. The AST borrows
+//! string/name slices from `input` and is valid for as long as the
+//! arena and input outlive it.
+
+const std = @import("std");
+
+const Tokenizer = @import("Tokenizer.zig");
+const ast = @import("ast.zig");
+
+const Token = Tokenizer.Token;
+const Allocator = std.mem.Allocator;
+
+const Parser = @This();
+
+pub const Error = error{
+ OutOfMemory,
+ UnexpectedToken,
+ ExpectedNodeTest,
+ ExpectedPrimaryExpr,
+ MaxDepthExceeded,
+};
+
+/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`,
+/// `------5`) from blowing the stack. Real XPath expressions never come
+/// close to this; browsers typically allow several hundred.
+const max_depth: u16 = 64;
+
+arena: Allocator,
+tokens: []const Token,
+pos: usize = 0,
+depth: u16 = 0,
+
+pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr {
+ var token_list: std.ArrayList(Token) = .empty;
+ // Token count is bounded by input length; ¼-byte-per-token is
+ // generous for typical XPath and skips ArrayList regrowth.
+ try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4));
+ var tokenizer = Tokenizer{ .input = input };
+ while (true) {
+ const tok = tokenizer.next();
+ try token_list.append(arena, tok);
+ if (tok == .eof) break;
+ }
+
+ var parser = Parser{
+ .arena = arena,
+ .tokens = token_list.items,
+ };
+ const expr = try parser.parseExpr();
+ if (parser.peek() != .eof) return error.UnexpectedToken;
+ return expr;
+}
+
+// --- token cursor helpers ---
+
+fn peek(self: *const Parser) Token {
+ return self.tokens[self.pos];
+}
+
+fn lookahead(self: *const Parser, offset: usize) Token {
+ const idx = self.pos + offset;
+ if (idx >= self.tokens.len) return .eof;
+ return self.tokens[idx];
+}
+
+fn advance(self: *Parser) Token {
+ const tok = self.tokens[self.pos];
+ self.pos += 1;
+ return tok;
+}
+
+fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool {
+ return self.peek() == tag;
+}
+
+fn match(self: *Parser, tag: std.meta.Tag(Token)) bool {
+ if (self.at(tag)) {
+ _ = self.advance();
+ return true;
+ }
+ return false;
+}
+
+fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token {
+ if (!self.at(tag)) return error.UnexpectedToken;
+ return self.advance();
+}
+
+fn matchKeyword(self: *Parser, keyword: []const u8) bool {
+ const tok = self.peek();
+ if (tok == .name and std.mem.eql(u8, tok.name, keyword)) {
+ _ = self.advance();
+ return true;
+ }
+ return false;
+}
+
+fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr {
+ const expr = try self.arena.create(ast.Expr);
+ expr.* = value;
+ return expr;
+}
+
+fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr {
+ return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } });
+}
+
+// --- operator-precedence chain ---
+//
+// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path
+
+fn parseExpr(self: *Parser) Error!*ast.Expr {
+ if (self.depth >= max_depth) return error.MaxDepthExceeded;
+ self.depth += 1;
+ defer self.depth -= 1;
+ return self.parseOrExpr();
+}
+
+fn parseOrExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseAndExpr();
+ while (self.matchKeyword("or")) {
+ const right = try self.parseAndExpr();
+ left = try self.makeBinop(.or_, left, right);
+ }
+ return left;
+}
+
+fn parseAndExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseEqualityExpr();
+ while (self.matchKeyword("and")) {
+ const right = try self.parseEqualityExpr();
+ left = try self.makeBinop(.and_, left, right);
+ }
+ return left;
+}
+
+fn parseEqualityExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseRelationalExpr();
+ while (equalityOp(self.peek())) |op| {
+ _ = self.advance();
+ const right = try self.parseRelationalExpr();
+ left = try self.makeBinop(op, left, right);
+ }
+ return left;
+}
+
+fn parseRelationalExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseAdditiveExpr();
+ while (relationalOp(self.peek())) |op| {
+ _ = self.advance();
+ const right = try self.parseAdditiveExpr();
+ left = try self.makeBinop(op, left, right);
+ }
+ return left;
+}
+
+fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseMultExpr();
+ while (additiveOp(self.peek())) |op| {
+ _ = self.advance();
+ const right = try self.parseMultExpr();
+ left = try self.makeBinop(op, left, right);
+ }
+ return left;
+}
+
+// After a complete unary expression, `*` is multiply; `div`/`mod` are
+// operator-position keywords (tokenized as Name).
+fn parseMultExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parseUnaryExpr();
+ while (multOp(self.peek())) |op| {
+ _ = self.advance();
+ const right = try self.parseUnaryExpr();
+ left = try self.makeBinop(op, left, right);
+ }
+ return left;
+}
+
+fn parseUnaryExpr(self: *Parser) Error!*ast.Expr {
+ if (self.match(.minus)) {
+ if (self.depth >= max_depth) return error.MaxDepthExceeded;
+ self.depth += 1;
+ defer self.depth -= 1;
+ const operand = try self.parseUnaryExpr();
+ return try self.makeExpr(.{ .neg = operand });
+ }
+ return self.parseUnionExpr();
+}
+
+fn parseUnionExpr(self: *Parser) Error!*ast.Expr {
+ var left = try self.parsePathExpr();
+ while (self.match(.pipe)) {
+ const right = try self.parsePathExpr();
+ left = try self.makeBinop(.union_, left, right);
+ }
+ return left;
+}
+
+// --- path expressions ---
+
+fn parsePathExpr(self: *Parser) Error!*ast.Expr {
+ const t = self.peek();
+
+ if (t == .slash or t == .double_slash) {
+ return self.parseAbsPath();
+ }
+
+ // Filter-vs-relative-path disambiguation: a primary expression
+ // starts with `(`, string, number, `$`, or a `name(` where the
+ // name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`).
+ const is_filter = switch (t) {
+ .lparen, .string, .number, .dollar => true,
+ .name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name),
+ else => false,
+ };
+
+ if (is_filter) {
+ var primary = try self.parsePrimaryExpr();
+ while (self.match(.lbracket)) {
+ const pred = try self.parseExpr();
+ _ = try self.expect(.rbracket);
+ primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } });
+ }
+ if (self.peek() == .slash or self.peek() == .double_slash) {
+ const dsl = self.advance() == .double_slash;
+ var steps: std.ArrayList(ast.Step) = .empty;
+ if (dsl) try steps.append(self.arena, descendantOrSelfStep());
+ try self.parseRelStepsInto(&steps);
+ return try self.makeExpr(.{ .filter_path = .{
+ .filter = primary,
+ .steps = steps.items,
+ } });
+ }
+ return primary;
+ }
+
+ return self.parseRelPath();
+}
+
+fn parseAbsPath(self: *Parser) Error!*ast.Expr {
+ var steps: std.ArrayList(ast.Step) = .empty;
+ if (self.match(.double_slash)) {
+ try steps.append(self.arena, descendantOrSelfStep());
+ try self.parseRelStepsInto(&steps);
+ } else {
+ _ = try self.expect(.slash);
+ // `/` alone is the document root — no step required.
+ if (self.canStartStep()) try self.parseRelStepsInto(&steps);
+ }
+ return try self.makeExpr(.{ .path = .{
+ .absolute = true,
+ .steps = steps.items,
+ } });
+}
+
+fn parseRelPath(self: *Parser) Error!*ast.Expr {
+ var steps: std.ArrayList(ast.Step) = .empty;
+ try self.parseRelStepsInto(&steps);
+ return try self.makeExpr(.{ .path = .{
+ .absolute = false,
+ .steps = steps.items,
+ } });
+}
+
+fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void {
+ try steps.append(self.arena, try self.parseStep());
+ while (self.peek() == .slash or self.peek() == .double_slash) {
+ if (self.advance() == .double_slash) {
+ try steps.append(self.arena, descendantOrSelfStep());
+ }
+ try steps.append(self.arena, try self.parseStep());
+ }
+}
+
+fn canStartStep(self: *const Parser) bool {
+ return switch (self.peek()) {
+ .name, .star, .dot, .double_dot, .at => true,
+ else => false,
+ };
+}
+
+fn parseStep(self: *Parser) Error!ast.Step {
+ // Abbreviated steps `.` and `..` carry no axis, node-test, or
+ // predicates — predicates after `.` are a parse error per polyfill.
+ if (self.match(.dot)) return abbreviatedStep(.self);
+ if (self.match(.double_dot)) return abbreviatedStep(.parent);
+
+ var axis: ast.Axis = .child;
+ if (self.match(.at)) {
+ axis = .attribute;
+ } else if (self.peek() == .name and self.lookahead(1) == .double_colon) {
+ const axis_name = self.advance().name;
+ _ = self.advance(); // `::`
+ axis = parseAxisName(axis_name);
+ }
+
+ const node_test = try self.parseNodeTest();
+
+ var preds: std.ArrayList(*ast.Expr) = .empty;
+ while (self.match(.lbracket)) {
+ const pred = try self.parseExpr();
+ _ = try self.expect(.rbracket);
+ try preds.append(self.arena, pred);
+ }
+
+ return .{ .axis = axis, .node_test = node_test, .predicates = preds.items };
+}
+
+fn parseNodeTest(self: *Parser) Error!ast.NodeTest {
+ if (self.match(.star)) return .{ .name = "*" };
+ if (self.peek() != .name) return error.ExpectedNodeTest;
+
+ const name = self.peek().name;
+ if (typeTestKind(name)) |type_test| {
+ if (self.lookahead(1) == .lparen) {
+ _ = self.advance(); // name
+ _ = self.advance(); // `(`
+ // `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub).
+ if (type_test == .processing_instruction and self.peek() == .string) {
+ _ = self.advance();
+ }
+ _ = try self.expect(.rparen);
+ return .{ .type_test = type_test };
+ }
+ }
+ _ = self.advance();
+ return .{ .name = name };
+}
+
+fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr {
+ switch (self.peek()) {
+ .string => |s| {
+ _ = self.advance();
+ return try self.makeExpr(.{ .literal = s });
+ },
+ .number => |n| {
+ _ = self.advance();
+ return try self.makeExpr(.{ .number = n });
+ },
+ .dollar => {
+ _ = self.advance();
+ const name_tok = try self.expect(.name);
+ return try self.makeExpr(.{ .var_ref = name_tok.name });
+ },
+ .lparen => {
+ _ = self.advance();
+ const e = try self.parseExpr();
+ _ = try self.expect(.rparen);
+ return e;
+ },
+ .name => |name| {
+ _ = self.advance();
+ _ = try self.expect(.lparen);
+ var args: std.ArrayList(*ast.Expr) = .empty;
+ if (self.peek() != .rparen) {
+ try args.append(self.arena, try self.parseExpr());
+ while (self.match(.comma)) {
+ try args.append(self.arena, try self.parseExpr());
+ }
+ }
+ _ = try self.expect(.rparen);
+ return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } });
+ },
+ else => return error.ExpectedPrimaryExpr,
+ }
+}
+
+// --- pure helpers ---
+
+fn equalityOp(t: Token) ?ast.BinOpKind {
+ return switch (t) {
+ .eq => .eq,
+ .neq => .neq,
+ else => null,
+ };
+}
+
+fn relationalOp(t: Token) ?ast.BinOpKind {
+ return switch (t) {
+ .lt => .lt,
+ .gt => .gt,
+ .lte => .lte,
+ .gte => .gte,
+ else => null,
+ };
+}
+
+fn additiveOp(t: Token) ?ast.BinOpKind {
+ return switch (t) {
+ .plus => .add,
+ .minus => .sub,
+ else => null,
+ };
+}
+
+fn multOp(t: Token) ?ast.BinOpKind {
+ return switch (t) {
+ .star => .mul,
+ .name => |name| blk: {
+ if (std.mem.eql(u8, name, "div")) break :blk .div;
+ if (std.mem.eql(u8, name, "mod")) break :blk .mod;
+ break :blk null;
+ },
+ else => null,
+ };
+}
+
+fn descendantOrSelfStep() ast.Step {
+ return .{
+ .axis = .descendant_or_self,
+ .node_test = .{ .type_test = .node },
+ .predicates = &.{},
+ };
+}
+
+fn abbreviatedStep(axis: ast.Axis) ast.Step {
+ return .{
+ .axis = axis,
+ .node_test = .{ .type_test = .node },
+ .predicates = &.{},
+ };
+}
+
+fn isNodeTypeName(name: []const u8) bool {
+ return typeTestKind(name) != null;
+}
+
+const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{
+ .{ "node", .node },
+ .{ "text", .text },
+ .{ "comment", .comment },
+ .{ "processing-instruction", .processing_instruction },
+});
+
+fn typeTestKind(name: []const u8) ?ast.TypeTest {
+ return type_test_lookup.get(name);
+}
+
+const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{
+ .{ "child", .child },
+ .{ "descendant", .descendant },
+ .{ "descendant-or-self", .descendant_or_self },
+ .{ "self", .self },
+ .{ "parent", .parent },
+ .{ "ancestor", .ancestor },
+ .{ "ancestor-or-self", .ancestor_or_self },
+ .{ "following-sibling", .following_sibling },
+ .{ "preceding-sibling", .preceding_sibling },
+ .{ "following", .following },
+ .{ "preceding", .preceding },
+ .{ "attribute", .attribute },
+ .{ "namespace", .namespace },
+});
+
+fn parseAxisName(name: []const u8) ast.Axis {
+ return axis_lookup.get(name) orelse .unknown;
+}
+
+// ---------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------
+
+const testing = std.testing;
+
+fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ errdefer arena.deinit();
+ const expr = try parse(arena.allocator(), input);
+ return .{ .arena = arena, .expr = expr };
+}
+
+test "XPath.Parser: number literal" {
+ var fx = try parseFixture("42");
+ defer fx.arena.deinit();
+ try testing.expectEqual(@as(f64, 42), fx.expr.number);
+}
+
+test "XPath.Parser: string literal" {
+ var fx = try parseFixture("'hello'");
+ defer fx.arena.deinit();
+ try testing.expectEqualStrings("hello", fx.expr.literal);
+}
+
+test "XPath.Parser: variable reference strips $" {
+ var fx = try parseFixture("$x");
+ defer fx.arena.deinit();
+ try testing.expectEqualStrings("x", fx.expr.var_ref);
+}
+
+test "XPath.Parser: parenthesized expression unwraps" {
+ var fx = try parseFixture("(42)");
+ defer fx.arena.deinit();
+ try testing.expectEqual(@as(f64, 42), fx.expr.number);
+}
+
+test "XPath.Parser: function call with no args" {
+ var fx = try parseFixture("position()");
+ defer fx.arena.deinit();
+ try testing.expectEqualStrings("position", fx.expr.fn_call.name);
+ try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len);
+}
+
+test "XPath.Parser: function call with args" {
+ var fx = try parseFixture("substring('abc', 2, 1)");
+ defer fx.arena.deinit();
+ const fc = fx.expr.fn_call;
+ try testing.expectEqualStrings("substring", fc.name);
+ try testing.expectEqual(@as(usize, 3), fc.args.len);
+ try testing.expectEqualStrings("abc", fc.args[0].literal);
+ try testing.expectEqual(@as(f64, 2), fc.args[1].number);
+ try testing.expectEqual(@as(f64, 1), fc.args[2].number);
+}
+
+test "XPath.Parser: arithmetic precedence — mul binds tighter than add" {
+ var fx = try parseFixture("1 + 2 * 3");
+ defer fx.arena.deinit();
+ // Expected AST: add(1, mul(2, 3))
+ const top = fx.expr.binop;
+ try testing.expectEqual(ast.BinOpKind.add, top.op);
+ try testing.expectEqual(@as(f64, 1), top.left.number);
+ const mul = top.right.binop;
+ try testing.expectEqual(ast.BinOpKind.mul, mul.op);
+ try testing.expectEqual(@as(f64, 2), mul.left.number);
+ try testing.expectEqual(@as(f64, 3), mul.right.number);
+}
+
+test "XPath.Parser: arithmetic left-associativity" {
+ var fx = try parseFixture("1 - 2 - 3");
+ defer fx.arena.deinit();
+ // Expected AST: sub(sub(1, 2), 3)
+ const top = fx.expr.binop;
+ try testing.expectEqual(ast.BinOpKind.sub, top.op);
+ try testing.expectEqual(@as(f64, 3), top.right.number);
+ const inner = top.left.binop;
+ try testing.expectEqual(ast.BinOpKind.sub, inner.op);
+ try testing.expectEqual(@as(f64, 1), inner.left.number);
+ try testing.expectEqual(@as(f64, 2), inner.right.number);
+}
+
+test "XPath.Parser: div and mod are operator-position keywords" {
+ var fx = try parseFixture("7 div 2");
+ defer fx.arena.deinit();
+ try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op);
+
+ var fx2 = try parseFixture("7 mod 2");
+ defer fx2.arena.deinit();
+ try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op);
+}
+
+test "XPath.Parser: comparison operators" {
+ inline for (.{
+ .{ "1 = 2", ast.BinOpKind.eq },
+ .{ "1 != 2", ast.BinOpKind.neq },
+ .{ "1 < 2", ast.BinOpKind.lt },
+ .{ "1 <= 2", ast.BinOpKind.lte },
+ .{ "1 > 2", ast.BinOpKind.gt },
+ .{ "1 >= 2", ast.BinOpKind.gte },
+ }) |case| {
+ var fx = try parseFixture(case[0]);
+ defer fx.arena.deinit();
+ try testing.expectEqual(case[1], fx.expr.binop.op);
+ }
+}
+
+test "XPath.Parser: logical or/and short-circuit chain" {
+ var fx = try parseFixture("a or b and c");
+ defer fx.arena.deinit();
+ // Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter
+ const top = fx.expr.binop;
+ try testing.expectEqual(ast.BinOpKind.or_, top.op);
+ try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op);
+}
+
+test "XPath.Parser: unary minus" {
+ var fx = try parseFixture("-1");
+ defer fx.arena.deinit();
+ try testing.expectEqual(@as(f64, 1), fx.expr.neg.number);
+}
+
+test "XPath.Parser: union" {
+ var fx = try parseFixture("a | b");
+ defer fx.arena.deinit();
+ try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op);
+}
+
+test "XPath.Parser: absolute path / alone is document root" {
+ var fx = try parseFixture("/");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expect(path.absolute);
+ try testing.expectEqual(@as(usize, 0), path.steps.len);
+}
+
+test "XPath.Parser: absolute path /foo" {
+ var fx = try parseFixture("/foo");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expect(path.absolute);
+ try testing.expectEqual(@as(usize, 1), path.steps.len);
+ try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
+}
+
+test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" {
+ var fx = try parseFixture("//foo");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expect(path.absolute);
+ try testing.expectEqual(@as(usize, 2), path.steps.len);
+ try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
+ try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test);
+ try testing.expectEqualStrings("foo", path.steps[1].node_test.name);
+}
+
+test "XPath.Parser: relative path child::foo/bar" {
+ var fx = try parseFixture("foo/bar");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expect(!path.absolute);
+ try testing.expectEqual(@as(usize, 2), path.steps.len);
+ try testing.expectEqual(ast.Axis.child, path.steps[0].axis);
+ try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
+ try testing.expectEqualStrings("bar", path.steps[1].node_test.name);
+}
+
+test "XPath.Parser: abbreviated steps . and .." {
+ var fx = try parseFixture("./..");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expectEqual(@as(usize, 2), path.steps.len);
+ try testing.expectEqual(ast.Axis.self, path.steps[0].axis);
+ try testing.expectEqual(ast.Axis.parent, path.steps[1].axis);
+}
+
+test "XPath.Parser: attribute axis @class" {
+ var fx = try parseFixture("@class");
+ defer fx.arena.deinit();
+ const step = fx.expr.path.steps[0];
+ try testing.expectEqual(ast.Axis.attribute, step.axis);
+ try testing.expectEqualStrings("class", step.node_test.name);
+}
+
+test "XPath.Parser: all 12 named axes parse correctly" {
+ inline for (.{
+ .{ "child::a", ast.Axis.child },
+ .{ "descendant::a", ast.Axis.descendant },
+ .{ "descendant-or-self::a", ast.Axis.descendant_or_self },
+ .{ "self::a", ast.Axis.self },
+ .{ "parent::a", ast.Axis.parent },
+ .{ "ancestor::a", ast.Axis.ancestor },
+ .{ "ancestor-or-self::a", ast.Axis.ancestor_or_self },
+ .{ "following-sibling::a", ast.Axis.following_sibling },
+ .{ "preceding-sibling::a", ast.Axis.preceding_sibling },
+ .{ "following::a", ast.Axis.following },
+ .{ "preceding::a", ast.Axis.preceding },
+ .{ "namespace::a", ast.Axis.namespace },
+ }) |case| {
+ var fx = try parseFixture(case[0]);
+ defer fx.arena.deinit();
+ try testing.expectEqual(case[1], fx.expr.path.steps[0].axis);
+ }
+}
+
+test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" {
+ var fx = try parseFixture("wibble::a");
+ defer fx.arena.deinit();
+ try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis);
+}
+
+test "XPath.Parser: wildcard *" {
+ var fx = try parseFixture("*");
+ defer fx.arena.deinit();
+ try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name);
+}
+
+test "XPath.Parser: namespace-prefixed name and wildcard" {
+ var fx = try parseFixture("svg:rect");
+ defer fx.arena.deinit();
+ try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name);
+
+ var fx2 = try parseFixture("svg:*");
+ defer fx2.arena.deinit();
+ try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name);
+}
+
+test "XPath.Parser: node-type tests" {
+ inline for (.{
+ .{ "node()", ast.TypeTest.node },
+ .{ "text()", ast.TypeTest.text },
+ .{ "comment()", ast.TypeTest.comment },
+ .{ "processing-instruction()", ast.TypeTest.processing_instruction },
+ }) |case| {
+ var fx = try parseFixture(case[0]);
+ defer fx.arena.deinit();
+ try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test);
+ }
+}
+
+test "XPath.Parser: processing-instruction with literal target — consumed but ignored" {
+ var fx = try parseFixture("processing-instruction('xml-stylesheet')");
+ defer fx.arena.deinit();
+ try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test);
+}
+
+test "XPath.Parser: predicate on step" {
+ var fx = try parseFixture("p[1]");
+ defer fx.arena.deinit();
+ const step = fx.expr.path.steps[0];
+ try testing.expectEqual(@as(usize, 1), step.predicates.len);
+ try testing.expectEqual(@as(f64, 1), step.predicates[0].number);
+}
+
+test "XPath.Parser: multi-predicate step" {
+ var fx = try parseFixture("p[1][@x]");
+ defer fx.arena.deinit();
+ const step = fx.expr.path.steps[0];
+ try testing.expectEqual(@as(usize, 2), step.predicates.len);
+}
+
+test "XPath.Parser: filter expression with predicate parses as Filter, not Step" {
+ var fx = try parseFixture("(//a)[1]");
+ defer fx.arena.deinit();
+ // Top level is Filter wrapping a parenthesized path with one predicate.
+ const filt = fx.expr.filter;
+ try testing.expectEqual(@as(f64, 1), filt.predicate.number);
+ try testing.expect(filt.expr.path.absolute);
+}
+
+test "XPath.Parser: filter with multi-predicate nests" {
+ var fx = try parseFixture("(//a)[1][2]");
+ defer fx.arena.deinit();
+ const outer = fx.expr.filter;
+ try testing.expectEqual(@as(f64, 2), outer.predicate.number);
+ const inner = outer.expr.filter;
+ try testing.expectEqual(@as(f64, 1), inner.predicate.number);
+}
+
+test "XPath.Parser: filter with location-path tail (filter_path)" {
+ var fx = try parseFixture("(//a)/b");
+ defer fx.arena.deinit();
+ const fp = fx.expr.filter_path;
+ try testing.expect(fp.filter.path.absolute);
+ try testing.expectEqual(@as(usize, 1), fp.steps.len);
+ try testing.expectEqualStrings("b", fp.steps[0].node_test.name);
+}
+
+test "XPath.Parser: filter with // tail prepends descendant-or-self" {
+ var fx = try parseFixture("(//a)//b");
+ defer fx.arena.deinit();
+ const fp = fx.expr.filter_path;
+ try testing.expectEqual(@as(usize, 2), fp.steps.len);
+ try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis);
+ try testing.expectEqualStrings("b", fp.steps[1].node_test.name);
+}
+
+test "XPath.Parser: function call followed by predicate" {
+ var fx = try parseFixture("id('x')[1]");
+ defer fx.arena.deinit();
+ const filt = fx.expr.filter;
+ try testing.expectEqual(@as(f64, 1), filt.predicate.number);
+ try testing.expectEqualStrings("id", filt.expr.fn_call.name);
+}
+
+test "XPath.Parser: complex representative expression" {
+ var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]");
+ defer fx.arena.deinit();
+ const path = fx.expr.path;
+ try testing.expect(path.absolute);
+ try testing.expectEqual(@as(usize, 3), path.steps.len);
+ try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
+ try testing.expectEqualStrings("div", path.steps[1].node_test.name);
+ try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len);
+ try testing.expectEqualStrings("p", path.steps[2].node_test.name);
+ try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len);
+}
+
+fn expectParseError(input: []const u8, expected: anyerror) !void {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ try testing.expectError(expected, parse(arena.allocator(), input));
+}
+
+test "XPath.Parser: error on unbalanced paren" {
+ try expectParseError("(1", error.UnexpectedToken);
+}
+
+test "XPath.Parser: error on unbalanced bracket" {
+ try expectParseError("p[1", error.UnexpectedToken);
+}
+
+test "XPath.Parser: error on missing node test" {
+ try expectParseError("child::", error.ExpectedNodeTest);
+}
+
+test "XPath.Parser: bare `+` falls through to step and reports missing node test" {
+ // Matches polyfill: + isn't a path/primary start, so the parser
+ // ends up in parseStep with no name to use as node test.
+ try expectParseError("+", error.ExpectedNodeTest);
+}
+
+test "XPath.Parser: error on trailing tokens" {
+ try expectParseError("1 2", error.UnexpectedToken);
+}
+
+test "XPath.Parser: empty string falls through to step and reports missing node test" {
+ try expectParseError("", error.ExpectedNodeTest);
+}
+
+test "XPath.Parser: 91-case battery — every expression parses" {
+ // 91-case XPath 1.0 conformance battery covering every expression
+ // shape the public API surface accepts. Each entry must parse
+ // without error.
+ const battery = [_][]const u8{
+ "/html",
+ "/html/body",
+ "/",
+ "//h1",
+ "//ul/li",
+ "//ul//li",
+ ".",
+ ".//li",
+ "//section/*",
+ "//*[@id='heading']",
+ "//li[1]/following-sibling::li",
+ "//li[5]/preceding-sibling::li",
+ "//li/parent::ul",
+ "//li/ancestor::body",
+ "//li/ancestor-or-self::body",
+ "//li[3]/preceding::li",
+ "//li[1]/following::li",
+ "//ul/descendant::li",
+ "//ul/descendant-or-self::li",
+ "//section[1]/child::span",
+ "//*[@id='heading']/self::h1",
+ "//a[1]/attribute::href",
+ "//a[1]/@*",
+ "//li[1]",
+ "//li[last()]",
+ "//li[last() - 1]",
+ "//li[position() = 1]",
+ "//li[position() > 2]",
+ "//li[position() mod 2 = 1]",
+ "(//li)[1]",
+ "(//section)[2]",
+ "//li[3]/preceding-sibling::li[1]",
+ "//li[5]/ancestor::*[1]",
+ "//li[contains(concat(' ', @class, ' '), ' even ')][2]",
+ "//*[@id='heading' and @class='primary']",
+ "//*[@id='heading' or @id='p1']",
+ "//section[a]",
+ "//section[count(span) = 2]",
+ "//ul[count(li) = 5]",
+ "//tr[td[1]]",
+ "//tr[td/text() = 'Bob']",
+ "//*[starts-with(@id, 'link')]",
+ "//*[normalize-space() = 'Hello World']",
+ "//*[normalize-space(.) = 'Item 1']",
+ "//*[concat(@id, '-x') = 'heading-x']",
+ "//*[substring(@id, 1, 1) = 'p']",
+ "//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]",
+ "//p[translate(@id, 'p', 'q') = 'q1']",
+ "//*[substring-before(@id, '1') = 'p']",
+ "//*[substring-after(@id, 'lin') = 'k1']",
+ "//tr[number(td[2]) > 28]",
+ "//tr[floor(number(td[2]) div 10) = 3]",
+ "//tr[ceiling(number(td[2]) div 10) = 3]",
+ "//tr[round(number(td[2]) div 10) = 3]",
+ "//ul[sum(li/@data-len) = 0]",
+ "//p[boolean(@lang)]",
+ "//*[false()]",
+ "//*[name() = 'h1']",
+ "//*[local-name() = 'h1']",
+ "id('heading')",
+ "id('heading p1')",
+ "id(//em/parent::p/@id)",
+ "//h1 | //title",
+ "//h1 | //*[@id='p1']",
+ "//*[@id='heading'] | //*[@id='heading']",
+ "//li[position() + 1 = 3]",
+ "//li[position() - 1 = 0]",
+ "//li[position() * 2 = 4]",
+ "//li[position() div 2 = 1]",
+ "//li[(position() mod 2) = 0]",
+ "//tr[number(td[2]) = 30]",
+ "//tr[number(td[2]) != 30]",
+ "//tr[number(td[2]) < 30]",
+ "//tr[number(td[2]) <= 30]",
+ "//tr[number(td[2]) > 30]",
+ "//tr[number(td[2]) >= 30]",
+ "//tr[td[2] = 30]",
+ "//tr[td[2] = '30']",
+ "//comment()",
+ ".//a[contains(normalize-space(string(.)), 'Click me')]",
+ ".//input[(./@type = 'text')]",
+ ".//*[@id='heading']",
+ ".//li[contains(concat(' ', @class, ' '), ' even ')]",
+ "//*[@id='heading']/text()",
+ "//em/parent::p",
+ "//p[em]",
+ "//p[not(em)]",
+ "//section[a/@href = '/foo']",
+ "//ul/li[last()][position() = last()]",
+ "//ul[string(count(li)) = '5']",
+ "//body[count(//*[contains(@class, 'item')]) = 5]",
+ };
+ try testing.expectEqual(@as(usize, 91), battery.len);
+
+ for (battery) |expr| {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ _ = parse(arena.allocator(), expr) catch |err| {
+ std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) });
+ return err;
+ };
+ }
+}
+
+test "XPath.Parser: deep parenthesization rejected past max_depth" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ var buf: std.ArrayList(u8) = .empty;
+ defer buf.deinit(testing.allocator);
+ try buf.appendNTimes(testing.allocator, '(', max_depth + 1);
+ try buf.append(testing.allocator, '1');
+ try buf.appendNTimes(testing.allocator, ')', max_depth + 1);
+ try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
+}
+
+test "XPath.Parser: deep unary minus rejected past max_depth" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ var buf: std.ArrayList(u8) = .empty;
+ defer buf.deinit(testing.allocator);
+ try buf.appendNTimes(testing.allocator, '-', max_depth + 1);
+ try buf.append(testing.allocator, '1');
+ try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
+}
diff --git a/src/browser/xpath/Tokenizer.zig b/src/browser/xpath/Tokenizer.zig
new file mode 100644
index 00000000..6dd8d279
--- /dev/null
+++ b/src/browser/xpath/Tokenizer.zig
@@ -0,0 +1,464 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 expression tokenizer.
+//!
+//! HTML-pragmatic behavior: lenient whitespace, case-preserving names,
+//! no escape processing in string literals (use the other quote type
+//! to embed), unknown characters silently skipped.
+//!
+//! The tokenizer borrows from the input slice and never allocates.
+//! `next()` always returns a token; `.eof` is terminal and idempotent.
+
+const std = @import("std");
+
+const Tokenizer = @This();
+
+pub const Token = union(enum) {
+ /// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes
+ /// are not interpreted (the polyfill takes the raw substring).
+ string: []const u8,
+
+ /// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the
+ /// runtime number type.
+ number: f64,
+
+ /// Bare identifier — element/function/axis name, an `or`/`and`/
+ /// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`,
+ /// `prefix:*`). The colon and optional wildcard are preserved
+ /// verbatim so the parser can split.
+ name: []const u8,
+
+ slash, // `/`
+ double_slash, // `//`
+ dot, // `.`
+ double_dot, // `..`
+ at, // `@`
+ lparen, // `(`
+ rparen, // `)`
+ lbracket, // `[`
+ rbracket, // `]`
+ comma, // `,`
+ pipe, // `|`
+ eq, // `=`
+ neq, // `!=`
+ lt, // `<`
+ lte, // `<=`
+ gt, // `>`
+ gte, // `>=`
+ plus, // `+`
+ minus, // `-`
+ star, // `*`
+ dollar, // `$`
+ double_colon, // `::`
+ eof,
+};
+
+input: []const u8,
+position: usize = 0,
+
+fn isEof(self: *const Tokenizer) bool {
+ return self.position >= self.input.len;
+}
+
+// True iff the input has at least `n` bytes left after the current one
+// — i.e. `byteAt(n)` will not read past the end.
+fn hasAtLeast(self: *const Tokenizer, n: usize) bool {
+ return self.position + n < self.input.len;
+}
+
+fn byteAt(self: *const Tokenizer, offset: usize) u8 {
+ return self.input[self.position + offset];
+}
+
+fn skipWhitespace(self: *Tokenizer) void {
+ while (!self.isEof()) {
+ switch (self.input[self.position]) {
+ ' ', '\t', '\n', '\r' => self.position += 1,
+ else => return,
+ }
+ }
+}
+
+fn isNameStart(c: u8) bool {
+ return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
+}
+
+fn isNameContinue(c: u8) bool {
+ return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.';
+}
+
+fn consumeString(self: *Tokenizer, quote: u8) Token {
+ self.position += 1; // opening quote
+ const start = self.position;
+ while (!self.isEof() and self.input[self.position] != quote) {
+ self.position += 1;
+ }
+ const value = self.input[start..self.position];
+ // Closing quote skipped; at EOF we just emit what we have (polyfill parity).
+ if (!self.isEof()) self.position += 1;
+ return .{ .string = value };
+}
+
+fn consumeNumber(self: *Tokenizer) Token {
+ const start = self.position;
+ while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
+ self.position += 1;
+ }
+ if (!self.isEof() and self.input[self.position] == '.') {
+ self.position += 1;
+ while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
+ self.position += 1;
+ }
+ }
+ // Caller only enters consumeNumber on a digit or `.digit`, so the
+ // slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by
+ // parseFloat (verified against Zig 0.15.2).
+ const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable;
+ return .{ .number = value };
+}
+
+fn consumeName(self: *Tokenizer) Token {
+ const start = self.position;
+ while (!self.isEof() and isNameContinue(self.input[self.position])) {
+ self.position += 1;
+ }
+
+ // Optional namespace prefix: `prefix:local` or `prefix:*`. A `::`
+ // is the axis separator and belongs to the next token, so peek
+ // for a single `:` not followed by another `:`.
+ if (!self.isEof() and self.input[self.position] == ':' and
+ (self.position + 1 >= self.input.len or self.input[self.position + 1] != ':'))
+ {
+ self.position += 1; // `:`
+ if (!self.isEof() and self.input[self.position] == '*') {
+ self.position += 1;
+ } else {
+ while (!self.isEof() and isNameContinue(self.input[self.position])) {
+ self.position += 1;
+ }
+ }
+ }
+
+ return .{ .name = self.input[start..self.position] };
+}
+
+pub fn next(self: *Tokenizer) Token {
+ while (true) {
+ self.skipWhitespace();
+ if (self.isEof()) return .eof;
+
+ const c = self.byteAt(0);
+
+ if (c == '"' or c == '\'') {
+ return self.consumeString(c);
+ }
+
+ if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) {
+ return self.consumeNumber();
+ }
+
+ if (self.hasAtLeast(1)) {
+ const c2 = self.byteAt(1);
+ switch (c) {
+ '/' => if (c2 == '/') {
+ self.position += 2;
+ return .double_slash;
+ },
+ ':' => if (c2 == ':') {
+ self.position += 2;
+ return .double_colon;
+ },
+ '!' => if (c2 == '=') {
+ self.position += 2;
+ return .neq;
+ },
+ '<' => if (c2 == '=') {
+ self.position += 2;
+ return .lte;
+ },
+ '>' => if (c2 == '=') {
+ self.position += 2;
+ return .gte;
+ },
+ '.' => if (c2 == '.') {
+ self.position += 2;
+ return .double_dot;
+ },
+ else => {},
+ }
+ }
+
+ const single: ?Token = switch (c) {
+ '(' => .lparen,
+ ')' => .rparen,
+ '[' => .lbracket,
+ ']' => .rbracket,
+ ',' => .comma,
+ '|' => .pipe,
+ '=' => .eq,
+ '<' => .lt,
+ '>' => .gt,
+ '+' => .plus,
+ '-' => .minus,
+ '*' => .star,
+ '$' => .dollar,
+ '/' => .slash,
+ '@' => .at,
+ '.' => .dot,
+ else => null,
+ };
+ if (single) |tok| {
+ self.position += 1;
+ return tok;
+ }
+
+ if (isNameStart(c)) {
+ return self.consumeName();
+ }
+
+ // Polyfill parity (decision #2): unknown characters are
+ // silently skipped, never an error.
+ self.position += 1;
+ }
+}
+
+const testing = std.testing;
+
+fn expectTokens(input: []const u8, expected: []const Token) !void {
+ var tokenizer = Tokenizer{ .input = input };
+ for (expected) |exp| {
+ const got = tokenizer.next();
+ try testing.expectEqualDeep(exp, got);
+ }
+}
+
+test "XPath.Tokenizer: empty input emits EOF" {
+ try expectTokens("", &.{.eof});
+}
+
+test "XPath.Tokenizer: only whitespace emits EOF" {
+ try expectTokens(" \t\n\r ", &.{.eof});
+}
+
+test "XPath.Tokenizer: EOF idempotent past end" {
+ var t = Tokenizer{ .input = "" };
+ try testing.expectEqual(Token.eof, t.next());
+ try testing.expectEqual(Token.eof, t.next());
+ try testing.expectEqual(Token.eof, t.next());
+}
+
+test "XPath.Tokenizer: single-char operators" {
+ try expectTokens("()[],|=<>+-*$/@.", &.{
+ .lparen, .rparen, .lbracket, .rbracket, .comma, .pipe,
+ .eq, .lt, .gt, .plus, .minus, .star,
+ .dollar, .slash, .at, .dot, .eof,
+ });
+}
+
+test "XPath.Tokenizer: two-char operators" {
+ try expectTokens("// :: != <= >= ..", &.{
+ .double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof,
+ });
+}
+
+test "XPath.Tokenizer: two-char vs single-char disambiguation" {
+ try expectTokens("/a/b", &.{
+ .slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof,
+ });
+ try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof });
+ try expectTokens("a
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 AST.
+//!
+//! Slices and pointers are arena-owned by the Parser; the AST has no
+//! destructor.
+
+pub const Expr = union(enum) {
+ /// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`.
+ path: Path,
+ /// Filter expression followed by a location-path tail:
+ /// `(//a)/b`, `(expr)//c`.
+ filter_path: FilterPath,
+ /// Filter expression with a single predicate: `(expr)[n]`.
+ /// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2).
+ filter: Filter,
+ binop: BinOp,
+ /// Unary minus. The polyfill has no unary `+`.
+ neg: *Expr,
+ /// String literal, quotes stripped.
+ literal: []const u8,
+ /// Numeric literal, parsed to f64.
+ number: f64,
+ /// Variable reference. The leading `$` is stripped; per decision #3
+ /// the evaluator always returns the empty string.
+ var_ref: []const u8,
+ fn_call: FnCall,
+};
+
+pub const Path = struct {
+ absolute: bool,
+ steps: []const Step,
+};
+
+pub const FilterPath = struct {
+ filter: *Expr,
+ steps: []const Step,
+};
+
+pub const Filter = struct {
+ expr: *Expr,
+ predicate: *Expr,
+};
+
+pub const BinOp = struct {
+ op: BinOpKind,
+ left: *Expr,
+ right: *Expr,
+};
+
+pub const BinOpKind = enum {
+ or_,
+ and_,
+ eq,
+ neq,
+ lt,
+ gt,
+ lte,
+ gte,
+ add,
+ sub,
+ mul,
+ div,
+ mod,
+ union_,
+};
+
+pub const FnCall = struct {
+ name: []const u8,
+ args: []const *Expr,
+};
+
+pub const Step = struct {
+ axis: Axis,
+ node_test: NodeTest,
+ predicates: []const *Expr,
+};
+
+pub const Axis = enum {
+ child,
+ descendant,
+ descendant_or_self,
+ self,
+ parent,
+ ancestor,
+ ancestor_or_self,
+ following_sibling,
+ preceding_sibling,
+ following,
+ preceding,
+ attribute,
+ namespace,
+ /// Polyfill parity (decision #2): unknown axis names parse to
+ /// this variant; the evaluator returns an empty node-set.
+ unknown,
+};
+
+pub const NodeTest = union(enum) {
+ /// Element / attribute name. `"*"` is the wildcard. Namespaced forms
+ /// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator
+ /// does not split them, so they fall through to a literal `mem.eql`
+ /// against the node name (consistent with the `namespace::` axis stub
+ /// per decision #3).
+ /// TODO: real namespace support if the polyfill ever drops the stub.
+ name: []const u8,
+ /// `node()`, `text()`, `comment()`, `processing-instruction()`.
+ /// The optional target literal of `processing-instruction("foo")`
+ /// is consumed but not stored (decision #3 stub).
+ type_test: TypeTest,
+};
+
+pub const TypeTest = enum {
+ node,
+ text,
+ comment,
+ processing_instruction,
+};
diff --git a/src/browser/xpath/functions.zig b/src/browser/xpath/functions.zig
new file mode 100644
index 00000000..d8d42de4
--- /dev/null
+++ b/src/browser/xpath/functions.zig
@@ -0,0 +1,630 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 core function library — 25 functions covering the spec's
+//! core function set. `position()` and `last()` live in
+//! `Evaluator.evalFnCall` because they need the `(pos, size)` closure
+//! that this module never sees.
+//!
+//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager
+//! evaluation is fine here — short-circuit operators (`or`/`and`) are
+//! binops, not function calls, so laziness isn't required. The
+//! pre-evaluation contract also keeps functions.zig free of a circular
+//! import on Evaluator.zig.
+//!
+//! Stubs per decision #3:
+//! - `lang(string)` → always false
+//! - `namespace-uri(...)` → always ""
+//! - `name`/`local-name` → lowercased (HTML pragmatism)
+//!
+//! Allocations land in the caller's per-evaluation arena.
+
+const std = @import("std");
+const lp = @import("lightpanda");
+
+const Node = @import("../webapi/Node.zig");
+
+const result = @import("result.zig");
+
+const Frame = lp.Frame;
+const Element = Node.Element;
+const Document = Node.Document;
+const Allocator = std.mem.Allocator;
+
+pub const Error = error{
+ OutOfMemory,
+ WriteFailed,
+ StringTooLarge,
+ UnknownFunction,
+};
+
+/// Dispatch a core-library function call. Returns `error.UnknownFunction`
+/// if `name` doesn't match — the caller (Evaluator) handles
+/// `position()` / `last()` inline before getting here, so this is the
+/// last lookup stop.
+pub fn call(
+ arena: Allocator,
+ name: []const u8,
+ args: []const result.Result,
+ ctx: *Node,
+ frame: *Frame,
+) Error!result.Result {
+ // -- Node-set --
+ if (eql(name, "count")) return .{ .number = countFn(args) };
+ if (eql(name, "id")) return idFn(arena, args, ctx, frame);
+ if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) };
+ if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) };
+ if (eql(name, "namespace-uri")) return .{ .string = "" };
+
+ // -- String --
+ if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) };
+ if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) };
+ if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) };
+ if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) };
+ if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) };
+ if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) };
+ if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) };
+ if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) };
+ if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) };
+ if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) };
+
+ // -- Boolean --
+ if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) };
+ if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) };
+ if (eql(name, "true")) return .{ .boolean = true };
+ if (eql(name, "false")) return .{ .boolean = false };
+ if (eql(name, "lang")) return .{ .boolean = false };
+
+ // -- Number --
+ if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) };
+ if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) };
+ if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) };
+ if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) };
+ if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) };
+
+ return error.UnknownFunction;
+}
+
+inline fn eql(a: []const u8, b: []const u8) bool {
+ return std.mem.eql(u8, a, b);
+}
+
+// ----- node-set fns -----
+
+fn countFn(args: []const result.Result) f64 {
+ if (args.len == 0 or args[0] != .node_set) return 0;
+ return @floatFromInt(args[0].node_set.len);
+}
+
+fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result {
+ if (args.len == 0) return .{ .node_set = &.{} };
+
+ // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar
+ // arg → `toStr`. Then split on whitespace and look up each token.
+ const id_str: []const u8 = blk: {
+ if (args[0] == .node_set) {
+ var buf = std.Io.Writer.Allocating.init(arena);
+ for (args[0].node_set, 0..) |n, i| {
+ if (i > 0) try buf.writer.writeByte(' ');
+ const sv = try result.stringValueOf(arena, n);
+ try buf.writer.writeAll(sv);
+ }
+ break :blk buf.written();
+ }
+ break :blk try result.toString(arena, args[0]);
+ };
+
+ // `ctx.ownerDocument || ctx` — document nodes own themselves.
+ const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} });
+
+ var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
+ var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace);
+ while (it.next()) |tok| {
+ if (doc.getElementById(tok, frame)) |el| {
+ try seen.put(arena, el.asNode(), {});
+ }
+ }
+ return .{ .node_set = seen.keys() };
+}
+
+fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
+ const node = firstNodeOrCtx(args, ctx) orelse return "";
+ // For Element, `getLocalName` returns a slice into `_tag_name`
+ // (lowercase, namespace-prefix stripped) — lifetime exceeds the
+ // per-evaluation arena, so we borrow instead of duping.
+ if (node.is(Element)) |el| return el.getLocalName();
+ var buf: [256]u8 = undefined;
+ return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
+}
+
+fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
+ const node = firstNodeOrCtx(args, ctx) orelse return "";
+ // Diverges from `local-name` only on namespaced elements: `name`
+ // keeps the prefix (`ns:foo`), `local-name` strips it (`foo`).
+ if (node.is(Element)) |el| return el.getTagNameLower();
+ var buf: [256]u8 = undefined;
+ return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
+}
+
+fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node {
+ if (args.len == 0) return ctx;
+ if (args[0] != .node_set) return null;
+ if (args[0].node_set.len == 0) return null;
+ return args[0].node_set[0];
+}
+
+// ----- string fns -----
+
+fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
+ if (args.len == 0) return try result.stringValueOf(arena, ctx);
+ return try result.toString(arena, args[0]);
+}
+
+fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
+ var buf = std.Io.Writer.Allocating.init(arena);
+ for (args) |a| {
+ const s = try result.toString(arena, a);
+ try buf.writer.writeAll(s);
+ }
+ return buf.written();
+}
+
+fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool {
+ if (args.len < 2) return false;
+ const s1 = try result.toString(arena, args[0]);
+ const s2 = try result.toString(arena, args[1]);
+ return std.mem.startsWith(u8, s1, s2);
+}
+
+fn containsFn(arena: Allocator, args: []const result.Result) Error!bool {
+ if (args.len < 2) return false;
+ const s1 = try result.toString(arena, args[0]);
+ const s2 = try result.toString(arena, args[1]);
+ return std.mem.indexOf(u8, s1, s2) != null;
+}
+
+fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
+ if (args.len < 2) return "";
+ const s1 = try result.toString(arena, args[0]);
+ const s2 = try result.toString(arena, args[1]);
+ if (std.mem.indexOf(u8, s1, s2)) |idx| {
+ return s1[0..idx];
+ }
+ return "";
+}
+
+fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
+ if (args.len < 2) return "";
+ const s1 = try result.toString(arena, args[0]);
+ const s2 = try result.toString(arena, args[1]);
+ if (std.mem.indexOf(u8, s1, s2)) |idx| {
+ return s1[idx + s2.len ..];
+ }
+ return "";
+}
+
+fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
+ if (args.len < 2) return "";
+ const s = try result.toString(arena, args[0]);
+ const start_raw = try result.toNumber(arena, args[1]);
+ if (std.math.isNan(start_raw)) return "";
+ const start = roundHalfToPosInf(start_raw);
+
+ const s_len: f64 = @floatFromInt(s.len);
+ if (args.len >= 3) {
+ const len_raw = try result.toNumber(arena, args[2]);
+ if (std.math.isNan(len_raw)) return "";
+ const len = roundHalfToPosInf(len_raw);
+ const sum = start - 1 + len;
+ // -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior.
+ if (std.math.isNan(sum)) return "";
+ const si_f = @max(start - 1, 0);
+ const ei_f = @min(sum, s_len);
+ if (si_f >= ei_f) return "";
+ const si: usize = @intFromFloat(si_f);
+ const ei: usize = @intFromFloat(ei_f);
+ return s[si..ei];
+ }
+
+ const si_f = @max(start - 1, 0);
+ if (si_f >= s_len) return "";
+ const si: usize = @intFromFloat(si_f);
+ return s[si..];
+}
+
+fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
+ const s = if (args.len == 0)
+ try result.stringValueOf(arena, ctx)
+ else
+ try result.toString(arena, args[0]);
+ // Polyfill returns UTF-16 code units; we return UTF-8 bytes. They
+ // agree on ASCII (the gem's 91-case battery is ASCII-only). See
+ // .claude/skills/xpath-port/NOTES.md for the divergence rationale.
+ return @floatFromInt(s.len);
+}
+
+fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
+ const s = if (args.len == 0)
+ try result.stringValueOf(arena, ctx)
+ else
+ try result.toString(arena, args[0]);
+
+ const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
+ if (trimmed.len == 0) return "";
+
+ var buf = std.Io.Writer.Allocating.init(arena);
+ var prev_space = false;
+ for (trimmed) |c| {
+ if (std.ascii.isWhitespace(c)) {
+ if (!prev_space) try buf.writer.writeByte(' ');
+ prev_space = true;
+ } else {
+ try buf.writer.writeByte(c);
+ prev_space = false;
+ }
+ }
+ return buf.written();
+}
+
+fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
+ if (args.len < 3) return "";
+ const s = try result.toString(arena, args[0]);
+ const from = try result.toString(arena, args[1]);
+ const to = try result.toString(arena, args[2]);
+
+ var buf = std.Io.Writer.Allocating.init(arena);
+ for (s) |c| {
+ if (std.mem.indexOfScalar(u8, from, c)) |idx| {
+ // Chars in `from` past `to.len` are deleted (no copy).
+ if (idx < to.len) try buf.writer.writeByte(to[idx]);
+ } else {
+ try buf.writer.writeByte(c);
+ }
+ }
+ return buf.written();
+}
+
+// ----- number fns -----
+
+fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
+ if (args.len == 0) {
+ const sv = try result.stringValueOf(arena, ctx);
+ return result.stringToNumber(sv);
+ }
+ return try result.toNumber(arena, args[0]);
+}
+
+fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 {
+ if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64);
+ var total: f64 = 0;
+ for (args[0].node_set) |n| {
+ const sv = try result.stringValueOf(arena, n);
+ total += result.stringToNumber(sv);
+ }
+ return total;
+}
+
+/// Round half toward positive infinity. Matches JS `Math.round` (the
+/// polyfill calls it for both `round()` and `substring()`):
+/// round(0.5) = 1 round(-0.5) = 0
+/// round(1.5) = 2 round(-1.5) = -1
+/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`.
+fn roundHalfToPosInf(n: f64) f64 {
+ if (std.math.isNan(n) or !std.math.isFinite(n)) return n;
+ return std.math.floor(n + 0.5);
+}
+
+// ---------------------------------------------------------------------
+// Tests — pure-logic only. Functions that need a real DOM (id, name,
+// local-name, string with element ctx, sum, count of node-set, etc.)
+// are exercised via Phase 9 HTML fixtures in tests/xpath/.
+// ---------------------------------------------------------------------
+
+const testing = std.testing;
+const Tokenizer = @import("Tokenizer.zig");
+const Parser = @import("Parser.zig");
+const Evaluator = @import("Evaluator.zig");
+
+fn evalScalar(a: Allocator, src: []const u8) !result.Result {
+ const expr = try Parser.parse(a, src);
+ // Synthetic Frame/Node pointers — the public `evaluate` entry only
+ // touches the Frame for path/axis evaluation. Pure-scalar expressions
+ // (arithmetic, function calls returning scalars) never deref it.
+ return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000));
+}
+
+test "Functions: count() of non-node-set returns 0" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const r = try evalScalar(arena.allocator(), "count('hello')");
+ try testing.expect(r == .number);
+ try testing.expectEqual(@as(f64, 0), r.number);
+}
+
+test "Functions: string() on scalar coerces" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "string(42)", "42" },
+ .{ "string(3.14)", "3.14" },
+ .{ "string(true())", "true" },
+ .{ "string(false())", "false" },
+ .{ "string('hello')", "hello" },
+ .{ "string(0)", "0" },
+ .{ "string(-1)", "-1" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: concat() variadic" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "concat('a', 'b')", "ab" },
+ .{ "concat('a', 'b', 'c')", "abc" },
+ .{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" },
+ .{ "concat('x', 1, 'y')", "x1y" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: starts-with / contains" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "starts-with('hello', 'he')", true },
+ .{ "starts-with('hello', 'el')", false },
+ .{ "starts-with('hello', '')", true },
+ .{ "contains('hello world', 'wor')", true },
+ .{ "contains('hello', 'xyz')", false },
+ .{ "contains('hello', '')", true },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .boolean);
+ try testing.expectEqual(case[1], r.boolean);
+ }
+}
+
+test "Functions: substring-before / substring-after" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "substring-before('1999/04/01', '/')", "1999" },
+ .{ "substring-before('hello', 'xyz')", "" },
+ .{ "substring-after('1999/04/01', '/')", "04/01" },
+ .{ "substring-after('hello', 'xyz')", "" },
+ .{ "substring-after('hello', '')", "hello" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: substring() — XPath 1-based, rounding, NaN handling" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "substring('12345', 2, 3)", "234" },
+ .{ "substring('12345', 2)", "2345" },
+ // XPath spec example: round(1.5) = 2 → start at pos 2, len 2.
+ .{ "substring('12345', 1.5, 2.6)", "234" },
+ // start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2.
+ .{ "substring('12345', 0, 3)", "12" },
+ // Negative start clamps to 0.
+ .{ "substring('12345', -3, 7)", "123" },
+ // NaN start.
+ .{ "substring('12345', 'foo')", "" },
+ // NaN length.
+ .{ "substring('12345', 1, 'foo')", "" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: string-length on scalar arg" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "string-length('hello')", 5 },
+ .{ "string-length('')", 0 },
+ .{ "string-length('a b c')", 5 },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .number);
+ try testing.expectEqual(@as(f64, case[1]), r.number);
+ }
+}
+
+test "Functions: normalize-space" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "normalize-space(' hello world ')", "hello world" },
+ .{ "normalize-space('hello')", "hello" },
+ .{ "normalize-space('')", "" },
+ .{ "normalize-space(' ')", "" },
+ .{ "normalize-space('a\tb\nc')", "a b c" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: translate" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ // Standard XPath spec example.
+ .{ "translate('bar', 'abc', 'ABC')", "BAr" },
+ // Char in `from` past `to.len` is deleted.
+ .{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" },
+ .{ "translate('hello', '', '')", "hello" },
+ // Identity.
+ .{ "translate('abc', 'abc', 'abc')", "abc" },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .string);
+ try testing.expectEqualStrings(case[1], r.string);
+ }
+}
+
+test "Functions: boolean / not / true / false / lang" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "true()", true },
+ .{ "false()", false },
+ .{ "not(true())", false },
+ .{ "not(false())", true },
+ .{ "boolean(1)", true },
+ .{ "boolean(0)", false },
+ .{ "boolean('')", false },
+ .{ "boolean('x')", true },
+ // lang is a stub — always false.
+ .{ "lang('en')", false },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .boolean);
+ try testing.expectEqual(case[1], r.boolean);
+ }
+}
+
+test "Functions: number() on scalar arg" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ {
+ const r = try evalScalar(a, "number('42')");
+ try testing.expectEqual(@as(f64, 42), r.number);
+ }
+ {
+ const r = try evalScalar(a, "number(true())");
+ try testing.expectEqual(@as(f64, 1), r.number);
+ }
+ {
+ const r = try evalScalar(a, "number(false())");
+ try testing.expectEqual(@as(f64, 0), r.number);
+ }
+ {
+ const r = try evalScalar(a, "number('foo')");
+ try testing.expect(std.math.isNan(r.number));
+ }
+}
+
+test "Functions: floor / ceiling / round" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ inline for (.{
+ .{ "floor(1.5)", 1 },
+ .{ "floor(-1.5)", -2 },
+ .{ "floor(0)", 0 },
+ .{ "ceiling(1.5)", 2 },
+ .{ "ceiling(-1.5)", -1 },
+ .{ "ceiling(0)", 0 },
+ // Half-toward-positive-infinity (JS Math.round behavior).
+ .{ "round(0.5)", 1 },
+ .{ "round(-0.5)", 0 },
+ .{ "round(1.5)", 2 },
+ .{ "round(-1.5)", -1 },
+ .{ "round(2.5)", 3 },
+ }) |case| {
+ const r = try evalScalar(a, case[0]);
+ try testing.expect(r == .number);
+ try testing.expectEqual(@as(f64, case[1]), r.number);
+ }
+}
+
+test "Functions: round/floor/ceiling propagate NaN and Infinity" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ {
+ const r = try evalScalar(a, "round(1 div 0)"); // +Infinity
+ try testing.expect(std.math.isPositiveInf(r.number));
+ }
+ {
+ const r = try evalScalar(a, "round(0 div 0)"); // NaN
+ try testing.expect(std.math.isNan(r.number));
+ }
+ {
+ const r = try evalScalar(a, "floor(0 div 0)");
+ try testing.expect(std.math.isNan(r.number));
+ }
+ {
+ const r = try evalScalar(a, "ceiling(0 div 0)");
+ try testing.expect(std.math.isNan(r.number));
+ }
+}
+
+test "Functions: sum / count on non-node-set defaults" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ {
+ const r = try evalScalar(a, "sum('hello')");
+ try testing.expect(std.math.isNan(r.number));
+ }
+ {
+ const r = try evalScalar(a, "count('hello')");
+ try testing.expectEqual(@as(f64, 0), r.number);
+ }
+}
+
+test "Functions: roundHalfToPosInf" {
+ try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5));
+ try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5));
+ try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5));
+ try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5));
+ try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5));
+ try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64))));
+ try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64))));
+ try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64))));
+}
diff --git a/src/browser/xpath/result.zig b/src/browser/xpath/result.zig
new file mode 100644
index 00000000..0556f4ee
--- /dev/null
+++ b/src/browser/xpath/result.zig
@@ -0,0 +1,199 @@
+// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
+//
+// Francis Bouvier
+// Pierre Tachoire
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+//! XPath 1.0 runtime values.
+//!
+//! Tagged union over the four XPath value types: node-set, number,
+//! string, boolean. Type coercion (`toString`, `toNumber`, `toBoolean`)
+//! follows XPath 1.0 spec §3, with HTML-pragmatic shortcuts (decision
+//! #2).
+
+const std = @import("std");
+
+const Node = @import("../webapi/Node.zig");
+
+const CData = Node.CData;
+const Allocator = std.mem.Allocator;
+
+pub const Result = union(enum) {
+ /// Owned by the evaluator's arena. Order is significant only at the
+ /// public boundary, where the evaluator sorts to document order.
+ node_set: []const *Node,
+ number: f64,
+ string: []const u8,
+ boolean: bool,
+};
+
+/// XPath spec §5: string-value of a node.
+///
+/// - Element / Document: concatenated text descendants (excluding
+/// comments and processing-instructions; matches `Node.getTextContent`)
+/// - Attribute: attribute value
+/// - Text / Comment / CDATA / PI: the node's data
+/// - DocumentType / DocumentFragment: empty (matches polyfill's
+/// `nodeValue || textContent || ''` fallthrough)
+///
+/// The returned slice is borrowed from the node for cdata/attribute
+/// (cheap, no allocation) and arena-allocated for element/document
+/// (concatenation buffer).
+pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 {
+ return switch (node._type) {
+ .attribute => |attr| attr._value.str(),
+ .cdata => |cd| cd._data.str(),
+ .element, .document => blk: {
+ var buf = std.Io.Writer.Allocating.init(arena);
+ try node.getTextContent(&buf.writer);
+ break :blk buf.written();
+ },
+ .document_type, .document_fragment => "",
+ };
+}
+
+pub fn toBoolean(val: Result) bool {
+ return switch (val) {
+ .boolean => |b| b,
+ .number => |n| n != 0 and !std.math.isNan(n),
+ .string => |s| s.len > 0,
+ .node_set => |ns| ns.len > 0,
+ };
+}
+
+/// Numeric coercion. Empty / whitespace-only strings produce NaN
+/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying
+/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`).
+pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 {
+ return switch (val) {
+ .number => |n| n,
+ .boolean => |b| if (b) 1 else 0,
+ .string => |s| stringToNumber(s),
+ .node_set => |ns| blk: {
+ if (ns.len == 0) break :blk std.math.nan(f64);
+ const sv = try stringValueOf(arena, ns[0]);
+ break :blk stringToNumber(sv);
+ },
+ };
+}
+
+pub fn stringToNumber(s: []const u8) f64 {
+ const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
+ if (trimmed.len == 0) return std.math.nan(f64);
+ return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64);
+}
+
+/// String coercion. Allocates only for `.number` (formatting) and for
+/// `.node_set` whose first node is an Element/Document (text content
+/// concatenation). Boolean → static string. String → borrowed.
+pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 {
+ return switch (val) {
+ .string => |s| s,
+ .boolean => |b| if (b) "true" else "false",
+ .number => |n| try numberToString(arena, n),
+ .node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]),
+ };
+}
+
+/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings;
+/// integer-valued numbers print without trailing `.0`. Diverges from
+/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`.
+pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 {
+ if (std.math.isNan(n)) return "NaN";
+ if (std.math.isPositiveInf(n)) return "Infinity";
+ if (std.math.isNegativeInf(n)) return "-Infinity";
+ if (n == 0) return "0"; // covers +0 and -0
+ if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) {
+ return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))});
+ }
+ return std.fmt.allocPrint(arena, "{d}", .{n});
+}
+
+const testing = std.testing;
+
+test "Result: toBoolean" {
+ try testing.expect(toBoolean(.{ .boolean = true }));
+ try testing.expect(!toBoolean(.{ .boolean = false }));
+ try testing.expect(toBoolean(.{ .number = 1 }));
+ try testing.expect(!toBoolean(.{ .number = 0 }));
+ try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) }));
+ try testing.expect(toBoolean(.{ .string = "x" }));
+ try testing.expect(!toBoolean(.{ .string = "" }));
+ try testing.expect(!toBoolean(.{ .node_set = &.{} }));
+}
+
+test "Result: stringToNumber" {
+ try testing.expectEqual(@as(f64, 42), stringToNumber("42"));
+ try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14"));
+ try testing.expectEqual(@as(f64, -1), stringToNumber("-1"));
+ try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 "));
+ try testing.expect(std.math.isNan(stringToNumber("")));
+ try testing.expect(std.math.isNan(stringToNumber(" ")));
+ try testing.expect(std.math.isNan(stringToNumber("abc")));
+}
+
+test "Result: numberToString — integers print without decimal" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ try testing.expectEqualStrings("5", try numberToString(a, 5));
+ try testing.expectEqualStrings("0", try numberToString(a, 0));
+ try testing.expectEqualStrings("0", try numberToString(a, -0.0));
+ try testing.expectEqualStrings("-1", try numberToString(a, -1));
+ try testing.expectEqualStrings("42", try numberToString(a, 42.0));
+}
+
+test "Result: numberToString — special values" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64)));
+ try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64)));
+ try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64)));
+}
+
+test "Result: numberToString — floats" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ const a = arena.allocator();
+ try testing.expectEqualStrings("3.14", try numberToString(a, 3.14));
+ try testing.expectEqualStrings("0.5", try numberToString(a, 0.5));
+}
+
+test "Result: toString — boolean returns static string" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true }));
+ try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false }));
+}
+
+test "Result: toString — node-set with empty arr is empty" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} }));
+}
+
+test "Result: toNumber — empty node-set is NaN" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} })));
+}
+
+test "Result: toNumber — boolean coerces to 0/1" {
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
+ defer arena.deinit();
+ try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true }));
+ try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false }));
+}
diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig
index de7712e7..aa7cfb2a 100644
--- a/src/cdp/domains/dom.zig
+++ b/src/cdp/domains/dom.zig
@@ -27,6 +27,7 @@ const dump = @import("../../browser/dump.zig");
const js = @import("../../browser/js/js.zig");
const DOMNode = @import("../../browser/webapi/Node.zig");
const Selector = @import("../../browser/webapi/selector/Selector.zig");
+const xpath = @import("../../browser/xpath/Evaluator.zig");
const log = lp.log;
const Allocator = std.mem.Allocator;
@@ -91,6 +92,56 @@ fn getDocument(cmd: *CDP.Command) !void {
return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{});
}
+// Closed set of XPath 1.0 named axes. Matched literally before `::` so
+// CSS pseudo-elements (`a::before`, `div::first-line`) don't get
+// misrouted to the XPath evaluator just because they have an
+// identifier-looking word before `::`.
+const xpath_axis_names = std.StaticStringMap(void).initComptime(.{
+ .{ "child", {} },
+ .{ "descendant", {} },
+ .{ "descendant-or-self", {} },
+ .{ "self", {} },
+ .{ "parent", {} },
+ .{ "ancestor", {} },
+ .{ "ancestor-or-self", {} },
+ .{ "following-sibling", {} },
+ .{ "preceding-sibling", {} },
+ .{ "following", {} },
+ .{ "preceding", {} },
+ .{ "attribute", {} },
+ .{ "namespace", {} },
+});
+
+// Heuristic (decision #2/#9): treat the query as XPath when it begins
+// with a path operator or contains an axis specifier; otherwise fall
+// through to CSS.
+fn isXPathQuery(q: []const u8) bool {
+ if (q.len == 0) return false;
+ if (q[0] == '/') return true;
+ if (q[0] == '.' and q.len > 1 and q[1] == '/') return true;
+ if (q[0] == '(' and q.len > 1) {
+ if (q[1] == '/') return true;
+ if (q[1] == '.' and q.len > 2 and q[2] == '/') return true;
+ }
+ // For `::` to be an XPath axis separator, the identifier immediately
+ // before it must be one of the 13 named axes. Walk back the run of
+ // [a-zA-Z-] characters and look it up in the closed set.
+ var idx: usize = 0;
+ while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) {
+ if (hit == 0) continue;
+ var start = hit;
+ while (start > 0) {
+ const c = q[start - 1];
+ const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-';
+ if (!is_axis_char) break;
+ start -= 1;
+ }
+ if (start == hit) continue;
+ if (xpath_axis_names.has(q[start..hit])) return true;
+ }
+ return false;
+}
+
// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch
fn performSearch(cmd: *CDP.Command) !void {
const params = (try cmd.params(struct {
@@ -100,15 +151,23 @@ fn performSearch(cmd: *CDP.Command) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded;
- const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame);
+ const root = frame.window._document.asNode();
+
+ if (isXPathQuery(params.query)) {
+ const arena = try frame.getArena(.medium, "DOM.performSearch");
+ defer frame.releaseArena(arena);
+ const nodes = try xpath.searchAll(arena, root, params.query, frame);
+ return finishSearch(cmd, bc, nodes);
+ }
+
+ const list = try Selector.querySelectorAll(root, params.query, frame);
defer list.deinit(frame._page);
+ return finishSearch(cmd, bc, list._nodes);
+}
- const search = try bc.node_search_list.create(list._nodes);
-
- // dispatch setChildNodesEvents to inform the client of the subpart of node
- // tree covering the results.
- try dispatchSetChildNodes(cmd, list._nodes);
-
+fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void {
+ const search = try bc.node_search_list.create(nodes);
+ try dispatchSetChildNodes(cmd, nodes);
return cmd.sendResult(.{
.searchId = search.name,
.resultCount = @as(u32, @intCast(search.node_ids.len)),
@@ -616,6 +675,78 @@ test "cdp.dom: search flow" {
try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 });
}
+test "cdp.dom: performSearch with XPath" {
+ var ctx = try testing.context();
+ defer ctx.deinit();
+
+ _ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" });
+
+ try ctx.processMessage(.{
+ .id = 20,
+ .method = "DOM.performSearch",
+ .params = .{ .query = "//p" },
+ });
+ try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 });
+
+ try ctx.processMessage(.{
+ .id = 21,
+ .method = "DOM.performSearch",
+ .params = .{ .query = "descendant::p" },
+ });
+ try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 });
+
+ try ctx.processMessage(.{
+ .id = 22,
+ .method = "DOM.performSearch",
+ .params = .{ .query = "//*[@id='outer']" },
+ });
+ try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 });
+
+ try ctx.processMessage(.{
+ .id = 23,
+ .method = "DOM.performSearch",
+ .params = .{ .query = "p" },
+ });
+ try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 });
+
+ try ctx.processMessage(.{
+ .id = 24,
+ .method = "DOM.performSearch",
+ .params = .{ .query = "div p" },
+ });
+ try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 });
+}
+
+test "cdp.dom: isXPathQuery heuristic" {
+ // XPath-shaped queries — each line covers a distinct heuristic branch.
+ try std.testing.expect(isXPathQuery("/html"));
+ try std.testing.expect(isXPathQuery("//p"));
+ try std.testing.expect(isXPathQuery(".//foo"));
+ try std.testing.expect(isXPathQuery("(//foo)[1]"));
+ try std.testing.expect(isXPathQuery("(./bar)[2]"));
+ try std.testing.expect(isXPathQuery("descendant::p"));
+ try std.testing.expect(isXPathQuery("ancestor-or-self::*"));
+ try std.testing.expect(isXPathQuery("//*[@id='x']"));
+
+ // CSS-shaped queries — fall through to the existing path.
+ try std.testing.expect(!isXPathQuery(""));
+ try std.testing.expect(!isXPathQuery("p"));
+ try std.testing.expect(!isXPathQuery("div p"));
+ try std.testing.expect(!isXPathQuery("#main"));
+ try std.testing.expect(!isXPathQuery(".cls"));
+ try std.testing.expect(!isXPathQuery("[data-x]"));
+ try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS
+ try std.testing.expect(!isXPathQuery(".x")); // leading dot without /
+
+ // CSS pseudo-elements: identifier before `::` is not an XPath axis name.
+ try std.testing.expect(!isXPathQuery("a::before"));
+ try std.testing.expect(!isXPathQuery("div::after"));
+ try std.testing.expect(!isXPathQuery("p::first-line"));
+ try std.testing.expect(!isXPathQuery("input::placeholder"));
+ // Attribute selector with `::` inside a literal — nothing axis-like before it.
+ try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]"));
+}
+
test "cdp.dom: querySelector unknown search id" {
var ctx = try testing.context();
defer ctx.deinit();
diff --git a/src/lightpanda.zig b/src/lightpanda.zig
index b60292c9..fed1bf41 100644
--- a/src/lightpanda.zig
+++ b/src/lightpanda.zig
@@ -58,6 +58,7 @@ pub const FetchOpts = struct {
wait_ms: u32 = 5000,
wait_until: ?Config.WaitUntil = null,
wait_script: ?[:0]const u8 = null,
+ inject_script: std.ArrayList([]const u8) = .{},
wait_selector: ?[:0]const u8 = null,
dump: dump.Opts,
dump_mode: ?Config.DumpFormat = null,
@@ -79,6 +80,9 @@ pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !
}
}
+ // Stash scripts user want to inject.
+ session.inject_scripts = opts.inject_script.items;
+
const frame = try session.createPage();
// // Comment this out to get a profile of the JS code in v8/profile.json.
diff --git a/src/main.zig b/src/main.zig
index 499f2f2c..6d4bd249 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -128,6 +128,7 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
.wait_ms = opts.wait_ms,
.wait_until = opts.wait_until,
.wait_script = opts.wait_script,
+ .inject_script = opts.inject_script,
.wait_selector = opts.wait_selector,
.dump_mode = opts.dump,
.dump = .{
diff --git a/src/testing.zig b/src/testing.zig
index 549d1349..9e59ebc0 100644
--- a/src/testing.zig
+++ b/src/testing.zig
@@ -338,12 +338,21 @@ pub var test_notification: *Notification = undefined;
pub var test_session: *Session = undefined;
const WEB_API_TEST_ROOT = "src/browser/tests/";
-const HtmlRunnerOpts = struct {};
+const HtmlRunnerOpts = struct {
+ timeout_ms: u32 = 2000,
+ inject_script: ?[]const u8 = null,
+};
pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
- _ = opts;
defer reset();
+ var inject_scripts: [1][]const u8 = undefined;
+ if (opts.inject_script) |script| {
+ inject_scripts[0] = script;
+ test_session.inject_scripts = inject_scripts[0..1];
+ }
+ defer test_session.inject_scripts = &.{};
+
const root = try std.fs.path.joinZ(arena_allocator, &.{ WEB_API_TEST_ROOT, path });
const stat = std.fs.cwd().statFile(root) catch |err| {
std.debug.print("Failed to stat file: '{s}'", .{root});
@@ -356,7 +365,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
return;
}
try @import("root").subtest(root);
- try runWebApiTest(root);
+ try runWebApiTest(root, opts.timeout_ms);
},
.directory => {
var dir = try std.fs.cwd().openDir(root, .{
@@ -382,7 +391,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
const full_path = try std.fs.path.joinZ(arena_allocator, &.{ root, entry.name });
try @import("root").subtest(entry.name);
- try runWebApiTest(full_path);
+ try runWebApiTest(full_path, opts.timeout_ms);
}
},
else => |kind| {
@@ -392,7 +401,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
}
}
-fn runWebApiTest(test_file: [:0]const u8) !void {
+fn runWebApiTest(test_file: [:0]const u8, timeout_ms: u32) !void {
const frame = try test_session.createPage();
defer test_session.removePage();
@@ -418,7 +427,7 @@ fn runWebApiTest(test_file: [:0]const u8) !void {
var runner = try test_session.runner(.{});
try runner.wait(.{ .ms = 2000, .until = .load });
- var wait_ms: u32 = 2000;
+ var wait_ms: u32 = timeout_ms;
var timer = try std.time.Timer.start();
while (true) {
var try_catch: js.TryCatch = undefined;