mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
Merge branch 'main' into agent
This commit is contained in:
49
flake.lock
generated
49
flake.lock
generated
@@ -8,11 +8,11 @@
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1770708269,
|
||||
"narHash": "sha256-OnZW86app7hHJJoB5lC9GNXY5QBBIESJB+sIdwEyld0=",
|
||||
"lastModified": 1778493576,
|
||||
"narHash": "sha256-/vvNyF8C2tNTkxtffGUQbcTJvf72cRw3qo8cyBh33pM=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "6b5325a017a9a9fe7e6252ccac3680cc7181cd63",
|
||||
"rev": "5bf88a04d8678c7334f2f5072975f3b2cb0fe1ba",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -55,24 +55,6 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-utils_2": {
|
||||
"inputs": {
|
||||
"systems": "systems_2"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1705309234,
|
||||
"narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
@@ -96,16 +78,16 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1768649915,
|
||||
"narHash": "sha256-jc21hKogFnxU7KXSVTRmxC7u5D4RHwm9BAvDf5/Z1Uo=",
|
||||
"lastModified": 1778003029,
|
||||
"narHash": "sha256-q/nkKLDtHIyLjZpKhWk3cSK5IYsFqtMd6UtXF3ddjgA=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "3e3f3c7f9977dc123c23ee21e8085ed63daf8c37",
|
||||
"rev": "0c88e1f2bdb93d5999019e99cb0e61e1fe2af4c5",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nixos",
|
||||
"ref": "release-25.05",
|
||||
"ref": "nixos-25.11",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
@@ -122,11 +104,11 @@
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1770668050,
|
||||
"narHash": "sha256-Q05yaIZtQrBKHpyWaPmyJmDRj0lojnVf8nUFE0vydcY=",
|
||||
"lastModified": 1778424672,
|
||||
"narHash": "sha256-v/CZ9tJT+ulSe3ZmjuG3lWABwOvITbT7EqF/2NAl3Hs=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "9efc1f709f3c8134c3acac5d3592a8e4c184a0c6",
|
||||
"rev": "e266f5cab8f6525d0bc2ddccc0006418c534b5e6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -152,6 +134,7 @@
|
||||
}
|
||||
},
|
||||
"systems_2": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
@@ -169,17 +152,17 @@
|
||||
"zigPkgs": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-utils": "flake-utils_2",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
],
|
||||
"systems": "systems_2"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1770598090,
|
||||
"narHash": "sha256-k+82IDgTd9o5sxHIqGlvfwseKln3Ejx1edGtDltuPXo=",
|
||||
"lastModified": 1778375309,
|
||||
"narHash": "sha256-3+5C2LDX1lmupM6ktG6i50BRvRnN32WLinpxqa2g+HQ=",
|
||||
"owner": "mitchellh",
|
||||
"repo": "zig-overlay",
|
||||
"rev": "142495696982c88edddc8e17e4da90d8164acadf",
|
||||
"rev": "057bcab6a8e6a3a85e9293e150d35c63404e8fca",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
description = "headless browser designed for AI and automation";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:nixos/nixpkgs/release-25.05";
|
||||
nixpkgs.url = "github:nixos/nixpkgs/nixos-25.11";
|
||||
|
||||
zigPkgs.url = "github:mitchellh/zig-overlay";
|
||||
zigPkgs.inputs.nixpkgs.follows = "nixpkgs";
|
||||
|
||||
@@ -143,6 +143,24 @@ fn waitScriptFileValidator(allocator: Allocator, args: *std.process.ArgIterator)
|
||||
};
|
||||
}
|
||||
|
||||
fn injectScriptFileValidator(
|
||||
allocator: Allocator,
|
||||
args: *std.process.ArgIterator,
|
||||
list: *std.ArrayList([]const u8),
|
||||
) !void {
|
||||
const path = args.next() orelse {
|
||||
log.fatal(.app, "missing argument value", .{ .arg = "--inject-script-file" });
|
||||
return error.InvalidArgument;
|
||||
};
|
||||
|
||||
const bytes = std.fs.cwd().readFileAllocOptions(allocator, path, std.math.maxInt(usize), null, .of(u8), null) catch |err| {
|
||||
log.fatal(.app, "failed to read file", .{ .arg = "--inject-script-file", .path = path, .err = err });
|
||||
return error.InvalidArgument;
|
||||
};
|
||||
|
||||
return list.append(allocator, bytes);
|
||||
}
|
||||
|
||||
/// Definition for all the commands and its arguments. See @cli.zig for further.
|
||||
const Commands = cli.Builder(.{
|
||||
.{
|
||||
@@ -176,6 +194,14 @@ const Commands = cli.Builder(.{
|
||||
},
|
||||
},
|
||||
.{ .name = "wait_selector", .type = ?[:0]const u8 },
|
||||
.{
|
||||
.name = "inject_script",
|
||||
.type = []const u8,
|
||||
.multiple = true,
|
||||
.variants = .{
|
||||
.{ .name = "inject_script_file", .validator = injectScriptFileValidator },
|
||||
},
|
||||
},
|
||||
.{ .name = "terminate_ms", .type = ?u32 },
|
||||
},
|
||||
.shared_options = CommonOptions,
|
||||
@@ -688,6 +714,15 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
|
||||
\\--wait-script-file
|
||||
\\ Like --wait-script, but reads the script from a file.
|
||||
\\
|
||||
\\--inject-script JavaScript to execute as the document's <head> is
|
||||
\\ parsed, before any other scripts in the page run.
|
||||
\\ Can be passed multiple times; scripts run in order.
|
||||
\\
|
||||
\\--inject-script-file
|
||||
\\ Like --inject-script, but reads the script from a file.
|
||||
\\ Can be passed multiple times; can be mixed with
|
||||
\\ --inject-script and runs in CLI order.
|
||||
\\
|
||||
\\--terminate-ms Hard deadline in milliseconds. After this time elapses,
|
||||
\\ JavaScript execution is forcibly terminated (e.g. for
|
||||
\\ pages with endless scripts). Unlike --wait-ms, which
|
||||
|
||||
@@ -1815,26 +1815,12 @@ pub fn notifyNetworkAlmostIdle(self: *Frame) void {
|
||||
});
|
||||
}
|
||||
|
||||
// called from the parser
|
||||
pub fn appendNew(self: *Frame, parent: *Node, child: Node.NodeOrText) !void {
|
||||
const node = switch (child) {
|
||||
.node => |n| n,
|
||||
.text => |txt| blk: {
|
||||
// If we're appending this adjacently to a text node, we should merge
|
||||
if (parent.lastChild()) |sibling| {
|
||||
if (sibling.is(CData.Text)) |tn| {
|
||||
const cdata = tn._proto;
|
||||
const existing = cdata.getData().str();
|
||||
cdata._data = try String.concat(self.arena, &.{ existing, txt });
|
||||
return;
|
||||
}
|
||||
}
|
||||
break :blk try self.createTextNode(txt);
|
||||
},
|
||||
};
|
||||
|
||||
lp.assert(node._parent == null, "Frame.appendNew", .{});
|
||||
try self._insertNodeRelative(true, parent, node, .append, .{
|
||||
// called from the parser. Text-node merging is the parser's responsibility
|
||||
// (see Parser.appendTextChunk in src/browser/parser/Parser.zig); this is the
|
||||
// "insert this fully-formed node as a new last child of parent" entry point.
|
||||
pub fn appendNew(self: *Frame, parent: *Node, child: *Node) !void {
|
||||
lp.assert(child._parent == null, "Frame.appendNew", .{});
|
||||
try self._insertNodeRelative(true, parent, child, .append, .{
|
||||
// this opts has no meaning since we're passing `true` as the first
|
||||
// parameter, which indicates this comes from the parser, and has its
|
||||
// own special processing. Still, set it to be clear.
|
||||
@@ -2139,12 +2125,35 @@ pub fn createElementNS(self: *Frame, namespace: Element.Namespace, name: []const
|
||||
attribute_iterator,
|
||||
.{ ._proto = undefined },
|
||||
),
|
||||
asUint("head") => return self.createHtmlElementT(
|
||||
Element.Html.Head,
|
||||
namespace,
|
||||
attribute_iterator,
|
||||
.{ ._proto = undefined },
|
||||
),
|
||||
asUint("head") => {
|
||||
// Inject user-provided scripts.
|
||||
const inject_scripts = self._session.inject_scripts;
|
||||
const should_inject_scripts = from_parser and self._parse_mode == .document and inject_scripts.len > 0;
|
||||
|
||||
if (should_inject_scripts) {
|
||||
var ls: JS.Local.Scope = undefined;
|
||||
self.js.localScope(&ls);
|
||||
defer ls.deinit();
|
||||
|
||||
for (inject_scripts) |inject_script| {
|
||||
var try_catch: JS.TryCatch = undefined;
|
||||
try_catch.init(&ls.local);
|
||||
defer try_catch.deinit();
|
||||
|
||||
ls.local.eval(inject_script, "inject_script") catch |err| {
|
||||
const caught = try_catch.caughtOrError(self.call_arena, err);
|
||||
log.err(.app, "inject script error", .{ .err = caught });
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return self.createHtmlElementT(
|
||||
Element.Html.Head,
|
||||
namespace,
|
||||
attribute_iterator,
|
||||
.{ ._proto = undefined },
|
||||
);
|
||||
},
|
||||
asUint("body") => return self.createHtmlElementT(
|
||||
Element.Html.Body,
|
||||
namespace,
|
||||
@@ -4121,6 +4130,12 @@ test "WebApi: Integration" {
|
||||
try testing.htmlRunner("integration", .{});
|
||||
}
|
||||
|
||||
test "WebApi: inject_script" {
|
||||
try testing.htmlRunner("inject_script.html", .{
|
||||
.inject_script = "window.__injected = true; window.__injectValue = 42;",
|
||||
});
|
||||
}
|
||||
|
||||
test "Page: isSameOrigin" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
@@ -57,6 +57,8 @@ navigation: Navigation,
|
||||
storage_shed: storage.Shed,
|
||||
notification: *Notification,
|
||||
cookie_jar: storage.Cookie.Jar,
|
||||
/// User-provided scripts to inject into header.
|
||||
inject_scripts: []const []const u8 = &.{},
|
||||
|
||||
// Shared allocator. Used by Session itself and borrowed by Pages.
|
||||
arena_pool: *ArenaPool,
|
||||
|
||||
@@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{
|
||||
@import("../webapi/CryptoKey.zig"),
|
||||
@import("../webapi/Selection.zig"),
|
||||
@import("../webapi/ImageData.zig"),
|
||||
@import("../webapi/XPathResult.zig"),
|
||||
@import("../webapi/XPathExpression.zig"),
|
||||
@import("../webapi/XPathEvaluator.zig"),
|
||||
});
|
||||
|
||||
// APIs available on Worker context globals (constructors like URL, Headers, etc.)
|
||||
|
||||
@@ -23,6 +23,7 @@ const h5e = @import("html5ever.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
const Element = @import("../webapi/Element.zig");
|
||||
const CData = @import("../webapi/CData.zig");
|
||||
|
||||
pub const AttributeIterator = h5e.AttributeIterator;
|
||||
|
||||
@@ -39,6 +40,18 @@ pub const ParsedNode = struct {
|
||||
data: ?*anyopaque,
|
||||
};
|
||||
|
||||
// html5ever's tokenizer flushes the script-data character buffer on every '<'
|
||||
// (script-data-less-than-sign-state transition), which produces a separate
|
||||
// AppendText callback per chunk. Merging via String.concat in the previous
|
||||
// implementation was O(N^2/chunk_size) on the page-lifetime arena, blowing
|
||||
// memory on inline JS that contains embedded HTML strings (issue #2397).
|
||||
// Instead, we keep a single Parser-level buf and accumulate same-parent
|
||||
// chunks into it, committing once on flush.
|
||||
const PendingText = struct {
|
||||
parent: *Node,
|
||||
text_node: *CData,
|
||||
};
|
||||
|
||||
const Parser = @This();
|
||||
|
||||
frame: *Frame,
|
||||
@@ -46,6 +59,17 @@ err: ?Error,
|
||||
container: ParsedNode,
|
||||
arena: Allocator,
|
||||
strings: std.StringHashMapUnmanaged(void),
|
||||
pending_text: ?PendingText,
|
||||
// One buffer reused across every text run in this parser. clearRetainingCapacity
|
||||
// on flush keeps the largest capacity ever needed, so total dead memory on the
|
||||
// parser arena is bounded to one peak-run-sized allocation regardless of how
|
||||
// many text runs the parse contains. Matters for Streaming, whose arena is the
|
||||
// page-lifetime frame.arena (individual frees are no-ops there).
|
||||
//
|
||||
// Single-chunk text runs leave this buf empty: the chunk lives only in
|
||||
// CData._data via createTextNode. The buf is seeded from _data.str() on the
|
||||
// second chunk of a run, so the common case stays at one copy.
|
||||
buf: std.ArrayList(u8),
|
||||
|
||||
pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser {
|
||||
return .{
|
||||
@@ -57,6 +81,62 @@ pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser {
|
||||
.data = null,
|
||||
.node = node,
|
||||
},
|
||||
.pending_text = null,
|
||||
.buf = .empty,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn flushPendingText(self: *Parser) !void {
|
||||
const pt = self.pending_text orelse return;
|
||||
self.pending_text = null;
|
||||
// Single-chunk run: data already lives on _data via createTextNode.
|
||||
if (self.buf.items.len == 0) return;
|
||||
defer self.buf.clearRetainingCapacity();
|
||||
pt.text_node._data = try lp.String.init(
|
||||
self.frame.arena,
|
||||
self.buf.items,
|
||||
.{ .dupe = true },
|
||||
);
|
||||
}
|
||||
|
||||
fn appendTextChunk(self: *Parser, parent: *Node, txt: []const u8) !void {
|
||||
if (self.pending_text) |pt| {
|
||||
if (pt.parent == parent and parent.lastChild() == pt.text_node.asNode()) {
|
||||
// Second+ chunk of the same run. If buf is still empty, promote
|
||||
// from the single-chunk fast path by seeding from _data first.
|
||||
if (self.buf.items.len == 0) {
|
||||
const existing = pt.text_node.getData().str();
|
||||
try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len);
|
||||
self.buf.appendSliceAssumeCapacity(existing);
|
||||
}
|
||||
try self.buf.appendSlice(self.arena, txt);
|
||||
return;
|
||||
}
|
||||
try self.flushPendingText();
|
||||
}
|
||||
|
||||
if (parent.lastChild()) |sibling| {
|
||||
if (sibling.is(CData.Text)) |tn| {
|
||||
// Existing text sibling without a matching pending_text. Seed the
|
||||
// buf from its _data and register pending so subsequent chunks
|
||||
// accumulate cheaply.
|
||||
const cdata = tn._proto;
|
||||
const existing = cdata.getData().str();
|
||||
try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len);
|
||||
self.buf.appendSliceAssumeCapacity(existing);
|
||||
self.buf.appendSliceAssumeCapacity(txt);
|
||||
self.pending_text = .{ .parent = parent, .text_node = cdata };
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Fresh text run: the first chunk lives on _data only. buf stays empty
|
||||
// until (and unless) a second chunk arrives.
|
||||
const new_text = try self.frame.createTextNode(txt);
|
||||
try self.frame.appendNew(parent, new_text);
|
||||
self.pending_text = .{
|
||||
.parent = parent,
|
||||
.text_node = new_text.is(CData.Text).?._proto,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -101,6 +181,9 @@ pub fn parse(self: *Parser, html: []const u8) void {
|
||||
appendBeforeSiblingCallback,
|
||||
appendBasedOnParentNodeCallback,
|
||||
);
|
||||
self.flushPendingText() catch |err| {
|
||||
if (self.err == null) self.err = .{ .err = err, .source = .append };
|
||||
};
|
||||
}
|
||||
|
||||
/// Parse HTML with encoding conversion. Converts from charset to UTF-8 before parsing.
|
||||
@@ -127,6 +210,9 @@ pub fn parseWithEncoding(self: *Parser, html: []const u8, charset: []const u8) v
|
||||
appendBeforeSiblingCallback,
|
||||
appendBasedOnParentNodeCallback,
|
||||
);
|
||||
self.flushPendingText() catch |err| {
|
||||
if (self.err == null) self.err = .{ .err = err, .source = .append };
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parseXML(self: *Parser, xml: []const u8) void {
|
||||
@@ -150,6 +236,9 @@ pub fn parseXML(self: *Parser, xml: []const u8) void {
|
||||
appendBeforeSiblingCallback,
|
||||
appendBasedOnParentNodeCallback,
|
||||
);
|
||||
self.flushPendingText() catch |err| {
|
||||
if (self.err == null) self.err = .{ .err = err, .source = .append };
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parseFragment(self: *Parser, html: []const u8) void {
|
||||
@@ -173,6 +262,9 @@ pub fn parseFragment(self: *Parser, html: []const u8) void {
|
||||
appendBeforeSiblingCallback,
|
||||
appendBasedOnParentNodeCallback,
|
||||
);
|
||||
self.flushPendingText() catch |err| {
|
||||
if (self.err == null) self.err = .{ .err = err, .source = .append };
|
||||
};
|
||||
}
|
||||
|
||||
pub const Streaming = struct {
|
||||
@@ -233,8 +325,16 @@ pub const Streaming = struct {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn done(self: *Streaming) void {
|
||||
h5e.html5ever_streaming_parser_finish(self.handle.?);
|
||||
pub fn done(self: *Streaming) !void {
|
||||
// Null the handle before finish() so a flushPendingText failure can't
|
||||
// leave a finished-but-still-referenced handle behind for deinit to
|
||||
// double-free. flushPendingText doesn't touch the html5ever handle —
|
||||
// it only reads pending_text and writes to a text node's _data — so
|
||||
// running it after finish is safe.
|
||||
const handle = self.handle.?;
|
||||
self.handle = null;
|
||||
h5e.html5ever_streaming_parser_finish(handle);
|
||||
try self.parser.flushPendingText();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -252,6 +352,9 @@ fn popCallback(ctx: *anyopaque, node_ref: *anyopaque) callconv(.c) void {
|
||||
}
|
||||
|
||||
fn _popCallback(self: *Parser, node: *Node) !void {
|
||||
// Flush before any nodeComplete so Build.complete (and any custom-element
|
||||
// callbacks reachable from it) observe the final text data.
|
||||
try self.flushPendingText();
|
||||
try self.frame.nodeComplete(node);
|
||||
}
|
||||
|
||||
@@ -340,7 +443,7 @@ fn _appendDoctypeToDocument(self: *Parser, name: []const u8, public_id: []const
|
||||
});
|
||||
|
||||
// Append it to the document
|
||||
try frame.appendNew(self.container.node, .{ .node = doctype.asNode() });
|
||||
try frame.appendNew(self.container.node, doctype.asNode());
|
||||
}
|
||||
|
||||
fn addAttrsIfMissingCallback(ctx: *anyopaque, target_ref: *anyopaque, attributes: h5e.AttributeIterator) callconv(.c) void {
|
||||
@@ -402,6 +505,10 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !
|
||||
// child node is guaranteed not to belong to another parent
|
||||
switch (node_or_text.toUnion()) {
|
||||
.node => |cpn| {
|
||||
// Inserting a non-text child terminates any pending text run; flush
|
||||
// before the insertion so that connectedCallback (etc.) sees the
|
||||
// final data on the preceding text sibling.
|
||||
try self.flushPendingText();
|
||||
const child = getNode(cpn);
|
||||
if (child._parent) |previous_parent| {
|
||||
// html5ever says this can't happen, but we might be screwing up
|
||||
@@ -414,9 +521,9 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !
|
||||
}
|
||||
self.frame.removeNode(previous_parent, child, .{ .will_be_reconnected = parent.isConnected() });
|
||||
}
|
||||
try self.frame.appendNew(parent, .{ .node = child });
|
||||
try self.frame.appendNew(parent, child);
|
||||
},
|
||||
.text => |txt| try self.frame.appendNew(parent, .{ .text = txt }),
|
||||
.text => |txt| try self.appendTextChunk(parent, txt),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -427,6 +534,11 @@ fn removeFromParentCallback(ctx: *anyopaque, target_ref: *anyopaque) callconv(.c
|
||||
};
|
||||
}
|
||||
fn _removeFromParentCallback(self: *Parser, node: *Node) !void {
|
||||
// Removing a node mid-parse can detach the pending text node or its
|
||||
// parent; either way the pending invariant breaks. Flush first so the
|
||||
// accumulated bytes land on a still-attached text node (and pending_text
|
||||
// is cleared before any subsequent chunk targets a fresh node).
|
||||
try self.flushPendingText();
|
||||
const parent = node.parentNode() orelse return;
|
||||
_ = try parent.removeChild(node, self.frame);
|
||||
}
|
||||
@@ -438,6 +550,10 @@ fn reparentChildrenCallback(ctx: *anyopaque, node_ref: *anyopaque, new_parent_re
|
||||
};
|
||||
}
|
||||
fn _reparentChildrenCallback(self: *Parser, node: *Node, new_parent: *Node) !void {
|
||||
// Reparenting can move the pending text node out from under us — the
|
||||
// node's _parent changes but pending_text.parent does not. Flush so the
|
||||
// accumulator commits before the tree is rearranged.
|
||||
try self.flushPendingText();
|
||||
try self.frame.appendAllChildren(node, new_parent);
|
||||
}
|
||||
|
||||
@@ -448,6 +564,10 @@ fn appendBeforeSiblingCallback(ctx: *anyopaque, sibling_ref: *anyopaque, node_or
|
||||
};
|
||||
}
|
||||
fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e.NodeOrText) !void {
|
||||
// Foster parenting / before-sibling insertions interrupt any pending text
|
||||
// run (the new node lands at a different position from the pending text's
|
||||
// tail). Flush before reading the parent's structure.
|
||||
try self.flushPendingText();
|
||||
const parent = sibling.parentNode() orelse return error.NoParent;
|
||||
const node: *Node = switch (node_or_text.toUnion()) {
|
||||
.node => |cpn| blk: {
|
||||
|
||||
128
src/browser/tests/cdata/raw_text_chunked.html
Normal file
128
src/browser/tests/cdata/raw_text_chunked.html
Normal file
@@ -0,0 +1,128 @@
|
||||
<!DOCTYPE html>
|
||||
<script src="../testing.js"></script>
|
||||
|
||||
<!--
|
||||
Regression test for issue #2397.
|
||||
|
||||
When the html5ever tokenizer is in script-data / rawtext / rcdata state and
|
||||
encounters '<', it flushes the pending character buffer and re-enters via a
|
||||
fresh AppendText callback. Pre-fix, every chunk re-allocated and re-copied
|
||||
the growing text on the page-lifetime arena (O(N^2)). Real pages with
|
||||
embedded HTML strings inside JS literals blew memory to gigabytes.
|
||||
|
||||
This test guards correctness of the deferred-merge state machine in
|
||||
Parser.appendTextChunk / flushPendingText: a stress case big enough to
|
||||
cross many chunk boundaries, and exact byte-length assertions so that any
|
||||
future regression that drops or duplicates a chunk is caught. The memory
|
||||
bound itself is measured externally via the live reproducer documented in
|
||||
the PR description (apple.com US iPhone page: 3.5 GB → 125 MB).
|
||||
-->
|
||||
|
||||
<!--
|
||||
~6 KB script body containing many '<' characters in JS string literals.
|
||||
Each '<' forces html5ever to flush a chunk, exercising the same code path
|
||||
that produced gigabytes of allocations on apple.com pages.
|
||||
-->
|
||||
<script class="big-script">
|
||||
var a = "<a><b><c><d><e><f><g><h><i><j><k><l><m><n><o><p><q><r><s><t><u><v><w><x><y><z>";
|
||||
var b = "<aa><bb><cc><dd><ee><ff><gg><hh><ii><jj><kk><ll><mm><nn><oo><pp><qq><rr><ss><tt>";
|
||||
var c = "<uu><vv><ww><xx><yy><zz>";
|
||||
var d = "<a1><a2><a3><a4><a5><a6><a7><a8><a9><a0>";
|
||||
var e = "<b1><b2><b3><b4><b5><b6><b7><b8><b9><b0>";
|
||||
var f = "<c1><c2><c3><c4><c5><c6><c7><c8><c9><c0>";
|
||||
var g = "<d1><d2><d3><d4><d5><d6><d7><d8><d9><d0>";
|
||||
var h = "<e1><e2><e3><e4><e5><e6><e7><e8><e9><e0>";
|
||||
var i = "<f1><f2><f3><f4><f5><f6><f7><f8><f9><f0>";
|
||||
var j = "<g1><g2><g3><g4><g5><g6><g7><g8><g9><g0>";
|
||||
var k = "<h1><h2><h3><h4><h5><h6><h7><h8><h9><h0>";
|
||||
var l = "<i1><i2><i3><i4><i5><i6><i7><i8><i9><i0>";
|
||||
var m = "<j1><j2><j3><j4><j5><j6><j7><j8><j9><j0>";
|
||||
var n = "<k1><k2><k3><k4><k5><k6><k7><k8><k9><k0>";
|
||||
var o = "<l1><l2><l3><l4><l5><l6><l7><l8><l9><l0>";
|
||||
var p = "<m1><m2><m3><m4><m5><m6><m7><m8><m9><m0>";
|
||||
var q = "<n1><n2><n3><n4><n5><n6><n7><n8><n9><n0>";
|
||||
var r = "<o1><o2><o3><o4><o5><o6><o7><o8><o9><o0>";
|
||||
var s = "<p1><p2><p3><p4><p5><p6><p7><p8><p9><p0>";
|
||||
var t = "<q1><q2><q3><q4><q5><q6><q7><q8><q9><q0>";
|
||||
var u = "<r1><r2><r3><r4><r5><r6><r7><r8><r9><r0>";
|
||||
var v = "<s1><s2><s3><s4><s5><s6><s7><s8><s9><s0>";
|
||||
var w = "<t1><t2><t3><t4><t5><t6><t7><t8><t9><t0>";
|
||||
var x = "<u1><u2><u3><u4><u5><u6><u7><u8><u9><u0>";
|
||||
var done = "yes";
|
||||
</script>
|
||||
|
||||
<style class="big-style">
|
||||
/* less-than chars inside CSS string-literal content force chunk flushing
|
||||
in rawtext mode. Distinct content per rule so a regression that drops
|
||||
a mid-body chunk fails one of the indexOf checks below. */
|
||||
.a::before { content: "<a><b><c><d><e><f><g><h><i><j>"; }
|
||||
.b::before { content: "<k><l><m><n><o><p><q><r><s><t>"; }
|
||||
.c::before { content: "<u><v><w><x><y><z>"; }
|
||||
.d::before { content: "<aa><bb><cc><dd><ee><ff>"; }
|
||||
.e::before { content: "<gg><hh><ii><jj><kk><ll>"; }
|
||||
.f::before { content: "<mm><nn><oo><pp><qq><rr>"; }
|
||||
</style>
|
||||
|
||||
<!-- textarea is RCDATA: '<' in source must be escaped, but entities decode. -->
|
||||
<textarea class="big-textarea">line 1: <a><b><c><d><e><f>
|
||||
line 2: <g><h><i><j><k><l>
|
||||
line 3: <m><n><o><p><q><r>
|
||||
line 4: <s><t><u><v><w><x><y><z></textarea>
|
||||
|
||||
<title>A page <with> many <tags> in <the> title for testing</title>
|
||||
|
||||
<script id="rawtextchunked">
|
||||
// 1. Each raw-text element parses to exactly ONE text node child.
|
||||
let script = document.querySelector('script.big-script');
|
||||
testing.expectEqual(1, script.childNodes.length);
|
||||
testing.expectTrue(script.firstChild === script.lastChild);
|
||||
testing.expectEqual(3, script.firstChild.nodeType); // TEXT_NODE
|
||||
|
||||
let style = document.querySelector('style.big-style');
|
||||
testing.expectEqual(1, style.childNodes.length);
|
||||
testing.expectTrue(style.firstChild === style.lastChild);
|
||||
|
||||
let textarea = document.querySelector('textarea.big-textarea');
|
||||
testing.expectEqual(1, textarea.childNodes.length);
|
||||
testing.expectTrue(textarea.firstChild === textarea.lastChild);
|
||||
|
||||
let title = document.querySelector('title');
|
||||
testing.expectEqual(1, title.childNodes.length);
|
||||
testing.expectTrue(title.firstChild === title.lastChild);
|
||||
|
||||
// 2. Exact byte counts. A regression that drops or duplicates a chunk in
|
||||
// Parser.appendTextChunk would shift these.
|
||||
// '<' total in the script source:
|
||||
// 26 (a-z) + 20 (aa-tt) + 6 (uu-zz) + 21 lines * 10 (a1..u0) = 262.
|
||||
let body = script.firstChild.nodeValue;
|
||||
let lt_count = (body.match(/</g) || []).length;
|
||||
testing.expectEqual(262, lt_count);
|
||||
// String literals must round-trip — pick samples from the start, middle,
|
||||
// and end so a regression that loses a mid-body chunk fails loudly.
|
||||
testing.expectTrue(body.indexOf('<a><b><c><d>') !== -1);
|
||||
testing.expectTrue(body.indexOf('<m1><m2><m3>') !== -1);
|
||||
testing.expectTrue(body.indexOf('<u1><u2><u3>') !== -1);
|
||||
testing.expectTrue(body.indexOf('var done = "yes"') !== -1);
|
||||
|
||||
// 3. Style: 6 rules with distinct content, all preserved.
|
||||
let style_body = style.firstChild.nodeValue;
|
||||
let style_lt_count = (style_body.match(/</g) || []).length;
|
||||
// 10 + 10 + 6 + 6 + 6 + 6 = 44 '<' chars across the six rules.
|
||||
testing.expectEqual(44, style_lt_count);
|
||||
testing.expectTrue(style_body.indexOf('"<a><b><c>') !== -1);
|
||||
testing.expectTrue(style_body.indexOf('"<gg><hh>') !== -1);
|
||||
testing.expectTrue(style_body.indexOf('"<mm><nn>') !== -1);
|
||||
|
||||
// 4. Textarea (RCDATA): entities decoded. Exact line count and content.
|
||||
let ta_value = textarea.value;
|
||||
testing.expectEqual(4, ta_value.split('\n').length);
|
||||
// 4 lines * (6 + 6 + 6 + 8) = 26 '<' chars after entity decoding.
|
||||
let ta_lt_count = (ta_value.match(/</g) || []).length;
|
||||
testing.expectEqual(26, ta_lt_count);
|
||||
testing.expectTrue(ta_value.indexOf('line 1: <a><b>') !== -1);
|
||||
testing.expectTrue(ta_value.indexOf('line 4: <s><t><u>') !== -1);
|
||||
|
||||
// 5. Title (RCDATA): entities decoded; exact equality.
|
||||
testing.expectEqual('A page <with> many <tags> in <the> title for testing',
|
||||
title.textContent);
|
||||
</script>
|
||||
8
src/browser/tests/cdp/perform_search_xpath.html
Normal file
8
src/browser/tests/cdp/perform_search_xpath.html
Normal file
@@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<div id=outer>
|
||||
<p>1</p>
|
||||
<p>2</p>
|
||||
</div>
|
||||
<p>3</p>
|
||||
</body>
|
||||
@@ -380,6 +380,53 @@
|
||||
testing.expectEqual(0, nd.childElementCount);
|
||||
</script>
|
||||
|
||||
<script id=adoptNode>
|
||||
{
|
||||
// Adopting a Document throws NotSupportedError
|
||||
testing.withError((err) => {
|
||||
testing.expectEqual(9, err.code);
|
||||
testing.expectEqual("NotSupportedError", err.name);
|
||||
}, () => document.adoptNode(document));
|
||||
|
||||
// Same-document adopt: returns the node, clears parent, owner unchanged
|
||||
const el = document.createElement('div');
|
||||
const child = document.createElement('span');
|
||||
el.appendChild(child);
|
||||
document.body.appendChild(el);
|
||||
testing.expectEqual(document.body, el.parentNode);
|
||||
testing.expectEqual(document, el.ownerDocument);
|
||||
testing.expectEqual(document, child.ownerDocument);
|
||||
|
||||
testing.expectEqual(el, document.adoptNode(el));
|
||||
testing.expectEqual(null, el.parentNode);
|
||||
testing.expectEqual(child, el.firstChild);
|
||||
testing.expectEqual(document, el.ownerDocument);
|
||||
testing.expectEqual(document, child.ownerDocument);
|
||||
|
||||
// Cross-document adopt: node + descendants retarget to the new document
|
||||
const otherDoc = new Document();
|
||||
testing.expectEqual(el, otherDoc.adoptNode(el));
|
||||
testing.expectEqual(null, el.parentNode);
|
||||
testing.expectEqual(child, el.firstChild);
|
||||
testing.expectEqual(otherDoc, el.ownerDocument);
|
||||
testing.expectEqual(otherDoc, child.ownerDocument);
|
||||
|
||||
// Round-trip back to the main document
|
||||
testing.expectEqual(el, document.adoptNode(el));
|
||||
testing.expectEqual(document, el.ownerDocument);
|
||||
testing.expectEqual(document, child.ownerDocument);
|
||||
|
||||
// Adopt across documents removes the node from its old parent
|
||||
const orphan = document.createElement('p');
|
||||
document.body.appendChild(orphan);
|
||||
testing.expectEqual(document.body, orphan.parentNode);
|
||||
const otherDoc2 = new Document();
|
||||
testing.expectEqual(orphan, otherDoc2.adoptNode(orphan));
|
||||
testing.expectEqual(null, orphan.parentNode);
|
||||
testing.expectEqual(otherDoc2, orphan.ownerDocument);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=adoptedStyleSheets>
|
||||
{
|
||||
const acss = document.adoptedStyleSheets;
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
document.querySelector('p[data-random="abc\\5C def"]').textContent);
|
||||
|
||||
// A bare newline inside a string token is a parse error.
|
||||
testing.expectError("Error: InvalidAttributeSelector",
|
||||
testing.expectError("SyntaxError",
|
||||
() => document.querySelector('p[data-random="line one\nline two"]'));
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -1,6 +1,32 @@
|
||||
<!DOCTYPE html>
|
||||
<script src="../../testing.js"></script>
|
||||
|
||||
<link id="lh1" rel="stylesheet" href="/styles/main.css" media="screen" as="style" crossorigin="anonymous">
|
||||
<link id="lh2">
|
||||
|
||||
<script id="link-from-html">
|
||||
{
|
||||
const lh1 = document.getElementById('lh1');
|
||||
testing.expectEqual('HTMLLinkElement', lh1.constructor.name);
|
||||
testing.expectEqual('stylesheet', lh1.rel);
|
||||
testing.expectEqual(testing.ORIGIN + '/styles/main.css', lh1.href);
|
||||
testing.expectEqual('screen', lh1.media);
|
||||
testing.expectEqual('style', lh1.as);
|
||||
testing.expectEqual('anonymous', lh1.crossOrigin);
|
||||
|
||||
lh1.rel = 'preload';
|
||||
testing.expectEqual('preload', lh1.rel);
|
||||
lh1.media = 'print';
|
||||
testing.expectEqual('print', lh1.media);
|
||||
|
||||
const lh2 = document.getElementById('lh2');
|
||||
testing.expectEqual('', lh2.rel);
|
||||
testing.expectEqual('', lh2.href);
|
||||
testing.expectEqual('', lh2.media);
|
||||
testing.expectEqual('', lh2.as);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=link>
|
||||
let l2 = document.createElement('link');
|
||||
testing.expectEqual('', l2.href);
|
||||
@@ -18,6 +44,12 @@
|
||||
|
||||
l2.crossOrigin = '';
|
||||
testing.expectEqual('anonymous', l2.crossOrigin);
|
||||
|
||||
testing.expectEqual('', l2.media);
|
||||
l2.media = 'screen and (max-width: 600px)';
|
||||
testing.expectEqual('screen and (max-width: 600px)', l2.media);
|
||||
l2.media = 'print';
|
||||
testing.expectEqual('print', l2.media);
|
||||
</script>
|
||||
|
||||
<script id="link-load-event">
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
const container = $('#container');
|
||||
|
||||
// Empty functional pseudo-classes should error
|
||||
testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':has()'));
|
||||
testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':not()'));
|
||||
testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':lang()'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':has()'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':not()'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':lang()'));
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -21,9 +21,9 @@
|
||||
const container = $('#container');
|
||||
|
||||
// Invalid nth patterns
|
||||
testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(foo)'));
|
||||
testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(-)'));
|
||||
testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(+)'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':nth-child(foo)'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':nth-child(-)'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':nth-child(+)'));
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
const container = $('#container');
|
||||
|
||||
// Unknown pseudo-classes
|
||||
testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':unknown'));
|
||||
testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':not-a-real-pseudo'));
|
||||
testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':fake(test)'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':unknown'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':not-a-real-pseudo'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector(':fake(test)'));
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -53,8 +53,8 @@
|
||||
const container = $('#container');
|
||||
|
||||
// Combinators with nothing after
|
||||
testing.expectError("Error: InvalidSelector", () => container.querySelector('p >'));
|
||||
testing.expectError("Error: InvalidSelector", () => container.querySelector('p +'));
|
||||
testing.expectError("Error: InvalidSelector", () => container.querySelector('p ~'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector('p >'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector('p +'));
|
||||
testing.expectError("SyntaxError", () => container.querySelector('p ~'));
|
||||
}
|
||||
</script>
|
||||
|
||||
12
src/browser/tests/inject_script.html
Normal file
12
src/browser/tests/inject_script.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="./testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script id=inject-script-runs>
|
||||
testing.expectEqual(true, window.__injected);
|
||||
testing.expectEqual(42, window.__injectValue);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -30,7 +30,8 @@
|
||||
testing.expectEqual('undefined', typeof plainDoc.scripts);
|
||||
testing.expectEqual('undefined', typeof plainDoc.links);
|
||||
testing.expectEqual('undefined', typeof plainDoc.forms);
|
||||
testing.expectEqual('undefined', typeof plainDoc.location);
|
||||
// location lives on Document (returns null for non-HTMLDocument).
|
||||
testing.expectEqual(null, plainDoc.location);
|
||||
|
||||
// Both should have common Document properties
|
||||
testing.expectEqual('string', typeof document.URL);
|
||||
|
||||
123
src/browser/tests/xpath/document_evaluate.html
Normal file
123
src/browser/tests/xpath/document_evaluate.html
Normal file
@@ -0,0 +1,123 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=title>Hello</h1>
|
||||
<div class=group>
|
||||
<p id=p1>First</p>
|
||||
<p id=p2>Second</p>
|
||||
<p id=p3>Third</p>
|
||||
</div>
|
||||
<span id=span1 data-x="42">x</span>
|
||||
</body>
|
||||
|
||||
<script id=snapshot_basic>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p3', r.snapshotItem(2).id);
|
||||
testing.expectEqual(null, r.snapshotItem(3));
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=default_context>
|
||||
{
|
||||
const r1 = document.evaluate("//p", null, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r1.snapshotLength);
|
||||
const r2 = document.evaluate("//p", undefined, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r2.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=first_ordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
|
||||
const empty = document.evaluate("//nope", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(null, empty.singleNodeValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=number_type>
|
||||
{
|
||||
const r = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
|
||||
testing.expectEqual(3, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=string_type>
|
||||
{
|
||||
const r = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
|
||||
testing.expectEqual('Hello', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=boolean_type>
|
||||
{
|
||||
const r = document.evaluate("count(//p) > 0", document, null,
|
||||
XPathResult.BOOLEAN_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
|
||||
testing.expectEqual(true, r.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_type>
|
||||
{
|
||||
const ns = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
|
||||
testing.expectEqual('p1', ns.iterateNext().id);
|
||||
|
||||
const num = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
|
||||
testing.expectEqual(3, num.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=context_node_scoping>
|
||||
{
|
||||
const div = document.querySelector('div.group');
|
||||
const r = document.evaluate("./p", div, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_expression>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_ns_resolver>
|
||||
{
|
||||
const resolver = document.createNSResolver(document);
|
||||
testing.expectEqual(document, resolver);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=attribute_axis>
|
||||
{
|
||||
const r = document.evaluate("//span/@data-x", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual('42', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
201
src/browser/tests/xpath/xpath_conformance.html
Normal file
201
src/browser/tests/xpath/xpath_conformance.html
Normal file
@@ -0,0 +1,201 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>XPath conformance</title>
|
||||
<script src="../testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1 id="heading" class="primary">Hello World</h1>
|
||||
<p id="p1" lang="en" data-x="1">First paragraph with <em>emphasis</em>.</p>
|
||||
<p id="p2" class="note">Second paragraph.</p>
|
||||
<ul id="list">
|
||||
<li class="item odd">Item 1</li>
|
||||
<li class="item even">Item 2</li>
|
||||
<li class="item odd">Item 3</li>
|
||||
<li class="item even">Item 4</li>
|
||||
<li class="item odd">Item 5</li>
|
||||
</ul>
|
||||
<table id="t">
|
||||
<thead><tr><th>Name</th><th>Age</th></tr></thead>
|
||||
<tbody>
|
||||
<tr class="r"><td>Alice</td><td>30</td></tr>
|
||||
<tr class="r"><td>Bob</td><td>25</td></tr>
|
||||
<tr class="r"><td>Carol</td><td>40</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div id="container">
|
||||
<section id="s1"><span>A</span><span>B</span></section>
|
||||
<section id="s2"><a href="/foo" id="link1">Click me</a></section>
|
||||
<section id="s3"><a href="/bar" id="link2">Other link</a></section>
|
||||
</div>
|
||||
<form id="form">
|
||||
<label for="name">Name</label>
|
||||
<input id="name" type="text" name="name" value="">
|
||||
<input id="email" type="email" name="email" value="">
|
||||
<input id="hidden" type="hidden" name="csrf" value="x">
|
||||
<input id="checkbox" type="checkbox" name="agree">
|
||||
<button id="btn" type="submit">Submit</button>
|
||||
</form>
|
||||
<!-- a comment node -->
|
||||
<div id="multi-class" class="alpha beta gamma"></div>
|
||||
<article id="art">
|
||||
<p>One</p>
|
||||
<p>Two</p>
|
||||
<p>Three</p>
|
||||
</article>
|
||||
|
||||
<script id=conformance_battery>
|
||||
{
|
||||
// XPath 1.0 conformance battery. Result counts derive from the body
|
||||
// fixture above; keep the two in sync.
|
||||
const cases = [
|
||||
// Absolute paths
|
||||
["/html", 1, "absolute root child"],
|
||||
["/html/body", 1, "/html/body absolute"],
|
||||
["/", 1, "root only"],
|
||||
|
||||
// Descendant abbreviations
|
||||
["//h1", 1, "// descendant"],
|
||||
["//ul/li", 5, "/ child"],
|
||||
["//ul//li", 5, "// nested descendant"],
|
||||
[".", 1, "self ."],
|
||||
[".//li", 5, "context-rel descendant"],
|
||||
|
||||
// Wildcards
|
||||
["//section/*", 4, "//section/* (2 spans + 2 anchors)"],
|
||||
["//*[@id='heading']", 1, "//*[@id]"],
|
||||
|
||||
// Axes
|
||||
["//li[1]/following-sibling::li", 4, "following-sibling"],
|
||||
["//li[5]/preceding-sibling::li", 4, "preceding-sibling"],
|
||||
["//li/parent::ul", 1, "parent::"],
|
||||
["//li/ancestor::body", 1, "ancestor::body"],
|
||||
["//li/ancestor-or-self::body", 1, "ancestor-or-self::"],
|
||||
["//li[3]/preceding::li", 2, "preceding axis"],
|
||||
["//li[1]/following::li", 4, "following axis"],
|
||||
["//ul/descendant::li", 5, "descendant axis"],
|
||||
["//ul/descendant-or-self::li", 5, "descendant-or-self::li"],
|
||||
["//section[1]/child::span", 2, "child:: explicit"],
|
||||
["//*[@id='heading']/self::h1", 1, "self:: type guard"],
|
||||
|
||||
// Attribute axis
|
||||
["//a[1]/attribute::href", 2, "attribute::href"],
|
||||
["//a[1]/@*", 4, "@* (2 anchors x 2 attrs)"],
|
||||
|
||||
// Position predicates
|
||||
["//li[1]", 1, "[1]"],
|
||||
["//li[last()]", 1, "[last()]"],
|
||||
["//li[last() - 1]", 1, "[last() - 1]"],
|
||||
["//li[position() = 1]", 1, "explicit position()"],
|
||||
["//li[position() > 2]", 3, "position > 2"],
|
||||
["//li[position() mod 2 = 1]", 3, "position mod 2 = 1 (odd)"],
|
||||
["(//li)[1]", 1, "(//li)[1] filter on group"],
|
||||
["(//section)[2]", 1, "(//section)[2] grouped"],
|
||||
|
||||
// Reverse-axis proximity-order predicates
|
||||
["//li[3]/preceding-sibling::li[1]", 1, "reverse axis [1] = nearest"],
|
||||
["//li[5]/ancestor::*[1]", 1, "ancestor::*[1] = parent ul"],
|
||||
|
||||
// Multi-predicate / chained
|
||||
["//li[contains(concat(' ', @class, ' '), ' even ')][2]", 1, "filter then position [2]"],
|
||||
["//*[@id='heading' and @class='primary']", 1, "and"],
|
||||
["//*[@id='heading' or @id='p1']", 2, "or"],
|
||||
|
||||
// Sub-path predicates
|
||||
["//section[a]", 2, "section with a child"],
|
||||
["//section[count(span) = 2]", 1, "count() in predicate"],
|
||||
["//ul[count(li) = 5]", 1, "count() = 5"],
|
||||
["//tr[td[1]]", 3, "tr with first td (sub-step)"],
|
||||
["//tr[td/text() = 'Bob']", 1, "deep sub-path equality"],
|
||||
|
||||
// String functions
|
||||
["//*[starts-with(@id, 'link')]", 2, "starts-with"],
|
||||
["//*[normalize-space() = 'Hello World']", 1, "normalize-space() default arg"],
|
||||
["//*[normalize-space(.) = 'Item 1']", 1, "normalize-space(arg)"],
|
||||
["//*[concat(@id, '-x') = 'heading-x']", 1, "concat"],
|
||||
["//*[substring(@id, 1, 1) = 'p']", 2, "substring (3 args)"],
|
||||
["//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]", 1, "substring constrained"],
|
||||
["//p[translate(@id, 'p', 'q') = 'q1']", 1, "translate"],
|
||||
["//*[substring-before(@id, '1') = 'p']", 1, "substring-before"],
|
||||
["//*[substring-after(@id, 'lin') = 'k1']", 1, "substring-after"],
|
||||
|
||||
// Number functions
|
||||
["//tr[number(td[2]) > 28]", 2, "number() in compare"],
|
||||
["//tr[floor(number(td[2]) div 10) = 3]", 1, "floor + div"],
|
||||
["//tr[ceiling(number(td[2]) div 10) = 3]", 2, "ceiling + div"],
|
||||
["//tr[round(number(td[2]) div 10) = 3]", 2, "round half-up"],
|
||||
["//ul[sum(li/@data-len) = 0]", 1, "sum() over empty path -> 0"],
|
||||
|
||||
// Boolean functions
|
||||
["//p[boolean(@lang)]", 1, "boolean()"],
|
||||
["//*[false()]", 0, "false() always-false"],
|
||||
|
||||
// name() / local-name() — lowercased per decision #2
|
||||
["//*[name() = 'h1']", 1, "name() of context"],
|
||||
["//*[local-name() = 'h1']", 1, "local-name() of context"],
|
||||
|
||||
// id()
|
||||
["id('heading')", 1, "id()"],
|
||||
["id('heading p1')", 2, "id() multi-token"],
|
||||
["id(//em/parent::p/@id)", 1, "id() on attribute string-value"],
|
||||
|
||||
// Union
|
||||
["//h1 | //title", 2, "union (h1 + title)"],
|
||||
["//h1 | //*[@id='p1']", 2, "union of 2 different selectors"],
|
||||
["//*[@id='heading'] | //*[@id='heading']", 1, "self-union dedups"],
|
||||
|
||||
// Arithmetic
|
||||
["//li[position() + 1 = 3]", 1, "+"],
|
||||
["//li[position() - 1 = 0]", 1, "-"],
|
||||
["//li[position() * 2 = 4]", 1, "* multiply"],
|
||||
["//li[position() div 2 = 1]", 1, "div"],
|
||||
["//li[(position() mod 2) = 0]", 2, "mod"],
|
||||
|
||||
// Comparison — header tr's td[2] = 'Age' coerces to NaN, so
|
||||
// != against any number is true (NaN equals nothing).
|
||||
["//tr[number(td[2]) = 30]", 1, "= numeric"],
|
||||
["//tr[number(td[2]) != 30]", 3, "!= numeric (header NaN passes)"],
|
||||
["//tr[number(td[2]) < 30]", 1, "< numeric"],
|
||||
["//tr[number(td[2]) <= 30]", 2, "<= numeric"],
|
||||
["//tr[number(td[2]) > 30]", 1, "> numeric"],
|
||||
["//tr[number(td[2]) >= 30]", 2, ">= numeric"],
|
||||
["//tr[td[2] = 30]", 1, "string-vs-number coercion"],
|
||||
["//tr[td[2] = '30']", 1, "string-vs-string equality"],
|
||||
|
||||
// Node tests
|
||||
["//comment()", 1, "comment() node test"],
|
||||
|
||||
// Capybara-style real-world expressions
|
||||
[".//a[contains(normalize-space(string(.)), 'Click me')]", 1, "Capybara link locator"],
|
||||
[".//input[(./@type = 'text')]", 1, "Capybara text-field"],
|
||||
[".//*[@id='heading']", 1, "find-by-id"],
|
||||
[".//li[contains(concat(' ', @class, ' '), ' even ')]", 2, "class contains pattern"],
|
||||
|
||||
// Tricky / edge
|
||||
["//*[@id='heading']/text()", 1, "text() child of element"],
|
||||
["//em/parent::p", 1, "parent of inline"],
|
||||
["//p[em]", 1, "p with em descendant"],
|
||||
["//p[not(em)]", 4, "p without em"],
|
||||
["//section[a/@href = '/foo']", 1, "deep attribute eq"],
|
||||
["//ul/li[last()][position() = last()]", 1, "double last()"],
|
||||
["//ul[string(count(li)) = '5']", 1, "string() of number"],
|
||||
["//body[count(//*[contains(@class, 'item')]) = 5]", 1, "nested count of contains()"],
|
||||
];
|
||||
|
||||
for (const [xp, expected, desc] of cases) {
|
||||
let r;
|
||||
try {
|
||||
r = document.evaluate(xp, document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
} catch (e) {
|
||||
testing.fail(`[${desc}] ${xp} → threw: ${(e && e.message) || e}`);
|
||||
}
|
||||
if (r.snapshotLength !== expected) {
|
||||
testing.fail(`[${desc}] ${xp} → got ${r.snapshotLength}, expected ${expected}`);
|
||||
}
|
||||
}
|
||||
testing.expectEqual(91, cases.length);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
103
src/browser/tests/xpath/xpath_evaluator.html
Normal file
103
src/browser/tests/xpath/xpath_evaluator.html
Normal file
@@ -0,0 +1,103 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=h>Hello</h1>
|
||||
<p id=p1>One</p>
|
||||
<p id=p2>Two</p>
|
||||
</body>
|
||||
|
||||
<script id=create_expression>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
testing.expectEqual('function', typeof expr.evaluate);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_evaluate>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_reuse_distinct_types>
|
||||
{
|
||||
// The cached AST should support multiple evaluations against
|
||||
// different requested types.
|
||||
const expr = document.createExpression("//p", null);
|
||||
|
||||
const snap = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(2, snap.snapshotLength);
|
||||
|
||||
const iter = expr.evaluate(document, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual('p1', iter.iterateNext().id);
|
||||
testing.expectEqual('p2', iter.iterateNext().id);
|
||||
|
||||
const first = expr.evaluate(document, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual('p1', first.singleNodeValue.id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_reuse_distinct_contexts>
|
||||
{
|
||||
// Re-evaluating with a different context node should rescope.
|
||||
const expr = document.createExpression("p", null);
|
||||
|
||||
const all = expr.evaluate(document.body, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(2, all.snapshotLength);
|
||||
|
||||
const empty = expr.evaluate(document.querySelector('h1'),
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(0, empty.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_ns_resolver>
|
||||
{
|
||||
const resolver = document.createNSResolver(document);
|
||||
testing.expectEqual(document, resolver);
|
||||
const elt = document.createNSResolver(document.body);
|
||||
testing.expectEqual(document.body, elt);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=new_xpath_evaluator>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
testing.expectEqual(true, ev instanceof XPathEvaluator);
|
||||
testing.expectEqual('function', typeof ev.evaluate);
|
||||
testing.expectEqual('function', typeof ev.createExpression);
|
||||
testing.expectEqual('function', typeof ev.createNSResolver);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_evaluate>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
const r = ev.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_create_expression>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
const expr = ev.createExpression("count(//p)", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
const r = expr.evaluate(document, XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(2, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_create_ns_resolver>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
testing.expectEqual(document, ev.createNSResolver(document));
|
||||
}
|
||||
</script>
|
||||
171
src/browser/tests/xpath/xpath_perf.html
Normal file
171
src/browser/tests/xpath/xpath_perf.html
Normal file
@@ -0,0 +1,171 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>XPath perf benchmark</title>
|
||||
<script src="../testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<!--
|
||||
Micro-benchmark for the XPath evaluator.
|
||||
|
||||
Builds a deterministic DOM (TREE_SIZE = 500), then runs each query
|
||||
a few warmup iterations followed by ITERATIONS timed iterations.
|
||||
Reports mean µs/iter via console.warn (the test harness sets
|
||||
log level to .warn and silently drops info-level lines, so
|
||||
console.log doesn't surface; console.warn does). A snapshotLength
|
||||
mismatch fails the test loudly via testing.fail so a regression in
|
||||
result count can't be hidden by the timing line.
|
||||
|
||||
Tun run, uncomment the test in XPathResult.zig (bottom of the file), then:
|
||||
Run: make test F="#xpath_perf"
|
||||
|
||||
Query shapes target the optimization roadmap:
|
||||
//*[@id='x'] — global ID lookup (fast-path candidate)
|
||||
//tag[@id='x'] — typed ID lookup (fast-path candidate)
|
||||
//tag — pure descendant tag scan
|
||||
//*[@class='x'] — non-ID attribute filter (no fast path)
|
||||
(//tag)[1] / [last()] — early-exit candidates (iterator opt)
|
||||
count(//tag) — early-exit candidate (iterator opt)
|
||||
Plus a few that should NOT change so we can detect regressions.
|
||||
|
||||
Tuning: keep TREE_SIZE × ITERATIONS small enough that the test
|
||||
finishes in <1s on debug builds. Adjust ITERATIONS up if numbers
|
||||
are noisy.
|
||||
-->
|
||||
|
||||
<script id=xpath_perf_setup>
|
||||
{
|
||||
const TREE_SIZE = 500;
|
||||
const TAGS = ["div", "span", "p"];
|
||||
const CLASSES = ["alpha", "beta", "gamma"];
|
||||
const TARGET_INDEX = 250;
|
||||
|
||||
// Expose the construction parameters for the run script.
|
||||
window.__perf = { TREE_SIZE, TAGS, CLASSES, TARGET_INDEX };
|
||||
|
||||
// Decorrelate tag (period 3) and class (period 4) so that
|
||||
// //div[@class='alpha'] is not a degenerate restatement of //div.
|
||||
function tagOf(i) { return TAGS[i % TAGS.length]; }
|
||||
function classOf(i) { return CLASSES[(i % 4) % CLASSES.length]; }
|
||||
|
||||
// Pre-compute expected counts so the assertions don't have to
|
||||
// re-derive the formula. Stored on window.__perf for the run script.
|
||||
const tags = Object.fromEntries(TAGS.map(t => [t, 0]));
|
||||
const classes = Object.fromEntries(CLASSES.map(c => [c, 0]));
|
||||
const cross = {};
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const t = tagOf(i), c = classOf(i);
|
||||
tags[t]++;
|
||||
classes[c]++;
|
||||
const k = `${t}-${c}`;
|
||||
cross[k] = (cross[k] || 0) + 1;
|
||||
}
|
||||
window.__perf.tags = tags;
|
||||
window.__perf.classes = classes;
|
||||
window.__perf.cross = cross;
|
||||
window.__perf.targetTag = tagOf(TARGET_INDEX);
|
||||
|
||||
// Build the fixture body via innerHTML in one shot. Faster than
|
||||
// createElement loops because html5ever parses the whole string at
|
||||
// once and we don't pay per-element bridge crossings.
|
||||
const parts = [];
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const id = (i === TARGET_INDEX) ? "target" : `n${i}`;
|
||||
parts.push(`<${tagOf(i)} id="${id}" class="${classOf(i)}">item ${i}</${tagOf(i)}>`);
|
||||
}
|
||||
// Use <main> as the wrapper so //div, //span, //p count only the
|
||||
// generated children (the wrapper itself doesn't share a tag with
|
||||
// any test query).
|
||||
const root = document.createElement("main");
|
||||
root.id = "perf_root";
|
||||
root.innerHTML = parts.join("");
|
||||
document.body.appendChild(root);
|
||||
|
||||
testing.expectEqual(TREE_SIZE, root.children.length);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=xpath_perf_run>
|
||||
{
|
||||
const ITERATIONS = 50;
|
||||
const WARMUP = 3;
|
||||
const { TREE_SIZE, tags, classes, cross, targetTag } = window.__perf;
|
||||
|
||||
const cases = [
|
||||
// --- ID lookups (fast-path #1 candidates) ---
|
||||
{ xp: "//*[@id='target']", expect: 1, label: "id-any" },
|
||||
{ xp: `//${targetTag}[@id='target']`, expect: 1, label: "id-typed-hit" },
|
||||
{ xp: "//div[@id='target']", expect: targetTag === "div" ? 1 : 0, label: "id-typed-miss" },
|
||||
|
||||
// --- Pure tag descendant ---
|
||||
{ xp: "//div", expect: tags.div, label: "tag-descendant" },
|
||||
{ xp: "//span", expect: tags.span, label: "tag-descendant-span" },
|
||||
{ xp: "//*", expect: null, label: "universal-descendant" },
|
||||
|
||||
// --- Attribute equality (no fast path planned) ---
|
||||
{ xp: "//*[@class='alpha']", expect: classes.alpha, label: "class-eq-any" },
|
||||
{ xp: "//div[@class='alpha']", expect: cross["div-alpha"] || 0, label: "class-eq-div" },
|
||||
|
||||
// --- Early-exit candidates (iterator optimization) ---
|
||||
{ xp: "(//div)[1]", expect: 1, label: "first-of-many" },
|
||||
{ xp: "(//div)[last()]", expect: 1, label: "last-of-many" },
|
||||
|
||||
// --- Functions in predicate (regression guard) ---
|
||||
{ xp: "//div[contains(@class,'alpha')]", expect: cross["div-alpha"] || 0, label: "contains-class" },
|
||||
{ xp: "//div[starts-with(@id,'n')]", expect: tags.div - (targetTag === "div" ? 1 : 0), label: "starts-with-id" },
|
||||
|
||||
// --- Counting (number result, iterator early-exit candidate) ---
|
||||
{ xp: "count(//div)", expect: tags.div, label: "count" },
|
||||
];
|
||||
|
||||
function pad(s, w) { s = String(s); return s.length >= w ? s : s + " ".repeat(w - s.length); }
|
||||
|
||||
// Result extractor that handles both node-set and number results.
|
||||
function evalAndCount(xp) {
|
||||
const r = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
if (r.resultType === XPathResult.NUMBER_TYPE) return r.numberValue;
|
||||
if (r.resultType === XPathResult.UNORDERED_NODE_ITERATOR_TYPE
|
||||
|| r.resultType === XPathResult.ORDERED_NODE_ITERATOR_TYPE) {
|
||||
let n = 0; while (r.iterateNext()) n++;
|
||||
return n;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Accumulate output into one big string. Per-call console.warn lines
|
||||
// get clobbered by the test harness's interleaved prints; batching
|
||||
// into a single warn call survives the interleaving intact.
|
||||
const out = [];
|
||||
out.push(`tree_size=${TREE_SIZE} iterations=${ITERATIONS} warmup=${WARMUP}`);
|
||||
out.push(`${pad("label", 24)} ${pad("count", 8)} ${pad("µs/iter", 10)} xpath`);
|
||||
|
||||
for (const c of cases) {
|
||||
let actual = null;
|
||||
for (let i = 0; i < WARMUP; i++) {
|
||||
actual = evalAndCount(c.xp);
|
||||
}
|
||||
|
||||
if (c.expect !== null && actual !== c.expect) {
|
||||
testing.fail(`[xpath-perf] ${c.label} returned ${actual}, expected ${c.expect}`);
|
||||
}
|
||||
|
||||
// Time the full document.evaluate call. ANY_TYPE so we don't pay
|
||||
// snapshot allocation we wouldn't pay in the realistic
|
||||
// DOM.performSearch path either.
|
||||
const t0 = performance.now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
document.evaluate(c.xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
}
|
||||
const t1 = performance.now();
|
||||
const us_per_iter = ((t1 - t0) * 1000) / ITERATIONS;
|
||||
|
||||
out.push(`${pad(c.label, 24)} ${pad(actual ?? "-", 8)} ${pad(us_per_iter.toFixed(1), 10)} ${c.xp}`);
|
||||
}
|
||||
|
||||
console.warn("[xpath-perf]\n" + out.map(l => "[xpath-perf] " + l).join("\n"));
|
||||
|
||||
testing.expectEqual(true, true);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
193
src/browser/tests/xpath/xpath_result.html
Normal file
193
src/browser/tests/xpath/xpath_result.html
Normal file
@@ -0,0 +1,193 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=h>Hello</h1>
|
||||
<p id=p1>One</p>
|
||||
<p id=p2>Two</p>
|
||||
</body>
|
||||
|
||||
<script id=type_constants>
|
||||
{
|
||||
testing.expectEqual(0, XPathResult.ANY_TYPE);
|
||||
testing.expectEqual(1, XPathResult.NUMBER_TYPE);
|
||||
testing.expectEqual(2, XPathResult.STRING_TYPE);
|
||||
testing.expectEqual(3, XPathResult.BOOLEAN_TYPE);
|
||||
testing.expectEqual(4, XPathResult.UNORDERED_NODE_ITERATOR_TYPE);
|
||||
testing.expectEqual(5, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
|
||||
testing.expectEqual(6, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(7, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(8, XPathResult.ANY_UNORDERED_NODE_TYPE);
|
||||
testing.expectEqual(9, XPathResult.FIRST_ORDERED_NODE_TYPE);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=instance_constants>
|
||||
{
|
||||
// Type constants are also exposed on instances.
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(7, r.ORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(0, r.ANY_TYPE);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=number_value>
|
||||
{
|
||||
const r = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=string_value>
|
||||
{
|
||||
const r = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
|
||||
testing.expectEqual('Hello', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=boolean_value>
|
||||
{
|
||||
const r = document.evaluate("count(//p) > 0", document, null,
|
||||
XPathResult.BOOLEAN_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
|
||||
testing.expectEqual(true, r.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=unordered_iterator>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
testing.expectEqual(false, r.invalidIteratorState);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=ordered_iterator>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_ITERATOR_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=unordered_snapshot>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p2', r.snapshotItem(1).id);
|
||||
testing.expectEqual(null, r.snapshotItem(2));
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=ordered_snapshot>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p2', r.snapshotItem(1).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_unordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ANY_UNORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
|
||||
const empty = document.evaluate("//nope", document, null,
|
||||
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(null, empty.singleNodeValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=first_ordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_type_resolution>
|
||||
{
|
||||
// ANY_TYPE settles into the natural category of the result.
|
||||
const ns = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
|
||||
|
||||
const num = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
|
||||
testing.expectEqual(2, num.numberValue);
|
||||
|
||||
const str = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, str.resultType);
|
||||
testing.expectEqual('Hello', str.stringValue);
|
||||
|
||||
const bool = document.evaluate("true()", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, bool.resultType);
|
||||
testing.expectEqual(true, bool.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=type_mismatch_throws>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
|
||||
// Wrong-typed accessors throw DOMException(InvalidStateError).
|
||||
let n = 0;
|
||||
try { r.numberValue; } catch (e) { n++; }
|
||||
try { r.stringValue; } catch (e) { n++; }
|
||||
try { r.booleanValue; } catch (e) { n++; }
|
||||
try { r.singleNodeValue; } catch (e) { n++; }
|
||||
try { r.iterateNext(); } catch (e) { n++; }
|
||||
testing.expectEqual(5, n);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=node_set_for_scalar_throws>
|
||||
{
|
||||
// Requesting a node-set type for a scalar expression rejects.
|
||||
let threw = false;
|
||||
try {
|
||||
document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
}
|
||||
testing.expectEqual(true, threw);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=iterator_exhaustion>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
// Re-calling on an exhausted iterator stays at null.
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
}
|
||||
</script>
|
||||
@@ -78,7 +78,7 @@ pub fn createDocument(_: *const DOMImplementation, namespace_: ?[]const u8, qual
|
||||
// Create and append root element if qualified_name provided
|
||||
if (qualified_name) |qname| {
|
||||
if (qname.len > 0) {
|
||||
const namespace = if (namespace_) |ns| Node.Element.Namespace.parse(ns) else .xml;
|
||||
const namespace = Node.Element.Namespace.parse(namespace_);
|
||||
const root = try frame.createElementNS(namespace, qname, null);
|
||||
_ = try document.asNode().appendChild(root, frame);
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig");
|
||||
const StyleSheetList = @import("css/StyleSheetList.zig");
|
||||
const FontFaceSet = @import("css/FontFaceSet.zig");
|
||||
const Selection = @import("Selection.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
const XPathExpression = @import("XPathExpression.zig");
|
||||
|
||||
pub const XMLDocument = @import("XMLDocument.zig");
|
||||
pub const HTMLDocument = @import("HTMLDocument.zig");
|
||||
@@ -119,7 +121,18 @@ pub fn asEventTarget(self: *Document) *@import("EventTarget.zig") {
|
||||
}
|
||||
|
||||
pub fn getURL(self: *const Document, frame: *const Frame) [:0]const u8 {
|
||||
return self._url orelse frame.url;
|
||||
return self._url orelse (self._frame orelse frame).url;
|
||||
}
|
||||
|
||||
pub fn getLocation(self: *const Document) ?*Location {
|
||||
if (self._type != .html) return null;
|
||||
const doc_frame = self._frame orelse return null;
|
||||
return doc_frame.window._location;
|
||||
}
|
||||
|
||||
pub fn setLocation(self: *Document, url: [:0]const u8, frame: *Frame) !void {
|
||||
if (self._type != .html) return;
|
||||
return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._frame });
|
||||
}
|
||||
|
||||
pub fn getContentType(self: *const Document) []const u8 {
|
||||
@@ -277,11 +290,11 @@ pub fn getSelection(self: *Document) *Selection {
|
||||
}
|
||||
|
||||
pub fn querySelector(self: *Document, input: String, frame: *Frame) !?*Element {
|
||||
return Selector.querySelector(self.asNode(), input.str(), frame);
|
||||
return Selector.querySelector(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn querySelectorAll(self: *Document, input: String, frame: *Frame) !*Selector.List {
|
||||
return Selector.querySelectorAll(self.asNode(), input.str(), frame);
|
||||
return Selector.querySelectorAll(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn getImplementation(self: *Document, frame: *Frame) !*DOMImplementation {
|
||||
@@ -412,6 +425,44 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val
|
||||
return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame);
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
self: *Document,
|
||||
expression: []const u8,
|
||||
context_node: ?*Node,
|
||||
resolver: ?js.Function,
|
||||
result_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// resolver/result are no-ops in HTML mode (decision #2).
|
||||
// Null/missing context_node falls back to the document — matches the
|
||||
// polyfill (decision #2). Firefox throws TypeError on a *missing*
|
||||
// arg, but the bridge can't distinguish "missing" from "explicit
|
||||
// null" here, so polyfill parity wins for the ambiguity.
|
||||
_ = resolver;
|
||||
_ = result;
|
||||
return XPathResult.fromExpression(
|
||||
expression,
|
||||
context_node orelse self.asNode(),
|
||||
result_type orelse XPathResult.ANY_TYPE,
|
||||
frame,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn createExpression(
|
||||
_: *const Document,
|
||||
expression: []const u8,
|
||||
resolver: ?js.Function,
|
||||
frame: *Frame,
|
||||
) !*XPathExpression {
|
||||
_ = resolver;
|
||||
return XPathExpression.init(expression, frame);
|
||||
}
|
||||
|
||||
pub fn createNSResolver(_: *const Document, node: *Node) ?*Node {
|
||||
return node;
|
||||
}
|
||||
|
||||
fn whatToShow(value_: ?js.Value) !u32 {
|
||||
const value = value_ orelse return 4294967295; // show all when undefined
|
||||
if (value.isUndefined()) {
|
||||
@@ -465,15 +516,21 @@ pub fn getFonts(self: *Document, frame: *Frame) !*FontFaceSet {
|
||||
return fonts;
|
||||
}
|
||||
|
||||
pub fn adoptNode(_: *const Document, node: *Node, frame: *Frame) !*Node {
|
||||
pub fn adoptNode(self: *Document, node: *Node, frame: *Frame) !*Node {
|
||||
if (node._type == .document) {
|
||||
return error.NotSupported;
|
||||
}
|
||||
|
||||
const old_owner = node.ownerDocument(frame) orelse frame.document;
|
||||
|
||||
if (node._parent) |parent| {
|
||||
frame.removeNode(parent, node, .{ .will_be_reconnected = false });
|
||||
}
|
||||
|
||||
if (old_owner != self) {
|
||||
try frame.adoptNodeTree(node, old_owner, self);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
@@ -666,7 +723,13 @@ fn writeInternal(self: *Document, text: []const []const u8, append_newline: bool
|
||||
if (self._script_created_parser) |*parser| {
|
||||
parser.read(html) catch |err| {
|
||||
log.warn(.dom, "document.write parser error", .{ .err = err });
|
||||
// was already closed
|
||||
// html5ever's handle was destroyed inside read(), but the
|
||||
// pending text buffer (if any) still wants to land on its
|
||||
// text node's _data — flushPendingText doesn't depend on
|
||||
// the handle, so attempt a final flush before dropping.
|
||||
parser.parser.flushPendingText() catch |flush_err| {
|
||||
log.warn(.dom, "flush after parser panic", .{ .err = flush_err });
|
||||
};
|
||||
self._script_created_parser = null;
|
||||
};
|
||||
}
|
||||
@@ -795,12 +858,12 @@ pub fn close(self: *Document, frame: *Frame) !void {
|
||||
return;
|
||||
}
|
||||
|
||||
// done() calls html5ever_streaming_parser_finish which frees the parser
|
||||
// We must NOT call deinit() after done() as that would be a double-free
|
||||
self._script_created_parser.?.done();
|
||||
// Just null out the handle since done() already freed it
|
||||
self._script_created_parser.?.handle = null;
|
||||
self._script_created_parser = null;
|
||||
// done() finishes html5ever's handle and runs the final flushPendingText.
|
||||
// Even if flushPendingText errors, the handle is already finished and we
|
||||
// must not retain the Streaming — defer so the error path also drops it.
|
||||
// (Streaming.done nulls its own handle, so dropping the struct is safe.)
|
||||
defer self._script_created_parser = null;
|
||||
try self._script_created_parser.?.done();
|
||||
|
||||
frame.documentIsComplete();
|
||||
}
|
||||
@@ -1027,6 +1090,7 @@ pub const JsApi = struct {
|
||||
|
||||
pub const onselectionchange = bridge.accessor(Document.getOnSelectionChange, Document.setOnSelectionChange, .{});
|
||||
pub const URL = bridge.accessor(Document.getURL, null, .{});
|
||||
pub const location = bridge.accessor(Document.getLocation, Document.setLocation, .{});
|
||||
pub const documentURI = bridge.accessor(Document.getURL, null, .{});
|
||||
pub const documentElement = bridge.accessor(Document.getDocumentElement, null, .{});
|
||||
pub const scrollingElement = bridge.accessor(Document.getDocumentElement, null, .{});
|
||||
@@ -1051,6 +1115,9 @@ pub const JsApi = struct {
|
||||
pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true });
|
||||
pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{});
|
||||
pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{});
|
||||
pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true });
|
||||
pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true });
|
||||
pub const createNSResolver = bridge.function(Document.createNSResolver, .{});
|
||||
pub const getElementById = bridge.function(_getElementById, .{});
|
||||
fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element {
|
||||
const value = value_ orelse return null;
|
||||
@@ -1111,3 +1178,7 @@ const testing = @import("../../testing.zig");
|
||||
test "WebApi: Document" {
|
||||
try testing.htmlRunner("document", .{});
|
||||
}
|
||||
|
||||
test "WebApi: Document.evaluate" {
|
||||
try testing.htmlRunner("xpath/document_evaluate.html", .{});
|
||||
}
|
||||
|
||||
@@ -84,11 +84,11 @@ pub fn getElementById(self: *DocumentFragment, id: []const u8) ?*Element {
|
||||
}
|
||||
|
||||
pub fn querySelector(self: *DocumentFragment, selector: []const u8, frame: *Frame) !?*Element {
|
||||
return Selector.querySelector(self.asNode(), selector, frame);
|
||||
return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn querySelectorAll(self: *DocumentFragment, input: []const u8, frame: *Frame) !*Selector.List {
|
||||
return Selector.querySelectorAll(self.asNode(), input, frame);
|
||||
return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn getChildren(self: *DocumentFragment, frame: *Frame) !collections.NodeLive(.child_elements) {
|
||||
|
||||
@@ -1071,15 +1071,15 @@ pub fn getChildElementCount(self: *Element) usize {
|
||||
}
|
||||
|
||||
pub fn matches(self: *Element, selector: []const u8, frame: *Frame) !bool {
|
||||
return Selector.matches(self, selector, frame);
|
||||
return Selector.matches(self, selector, frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn querySelector(self: *Element, selector: []const u8, frame: *Frame) !?*Element {
|
||||
return Selector.querySelector(self.asNode(), selector, frame);
|
||||
return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn querySelectorAll(self: *Element, input: []const u8, frame: *Frame) !*Selector.List {
|
||||
return Selector.querySelectorAll(self.asNode(), input, frame);
|
||||
return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err);
|
||||
}
|
||||
|
||||
pub fn getAnimations(_: *const Element) []*Animation {
|
||||
|
||||
@@ -196,15 +196,6 @@ pub fn getCurrentScript(self: *const HTMLDocument) ?*Element.Html.Script {
|
||||
return self._proto._current_script;
|
||||
}
|
||||
|
||||
pub fn getLocation(self: *const HTMLDocument) ?*@import("Location.zig") {
|
||||
const frame = self._proto._frame orelse return null;
|
||||
return frame.window._location;
|
||||
}
|
||||
|
||||
pub fn setLocation(self: *HTMLDocument, url: [:0]const u8, frame: *Frame) !void {
|
||||
return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._proto._frame });
|
||||
}
|
||||
|
||||
pub fn getDir(self: *HTMLDocument) []const u8 {
|
||||
const el = self._proto.getDocumentElement() orelse return "";
|
||||
const html = el.is(Element.Html) orelse return "";
|
||||
@@ -311,7 +302,6 @@ pub const JsApi = struct {
|
||||
pub const applets = bridge.accessor(HTMLDocument.getApplets, null, .{});
|
||||
pub const plugins = bridge.accessor(HTMLDocument.getEmbeds, null, .{});
|
||||
pub const currentScript = bridge.accessor(HTMLDocument.getCurrentScript, null, .{});
|
||||
pub const location = bridge.accessor(HTMLDocument.getLocation, HTMLDocument.setLocation, .{});
|
||||
pub const all = bridge.accessor(HTMLDocument.getAll, null, .{});
|
||||
pub const cookie = bridge.accessor(HTMLDocument.getCookie, HTMLDocument.setCookie, .{});
|
||||
pub const doctype = bridge.accessor(HTMLDocument.getDocType, null, .{});
|
||||
|
||||
@@ -166,7 +166,7 @@ pub fn findAdjacentNodes(self: *Node, position: []const u8) !struct { *Node, ?*N
|
||||
// Returned if:
|
||||
// * position is not one of the four listed values.
|
||||
// * The input is XML that is not well-formed.
|
||||
return error.Syntax;
|
||||
return error.SyntaxError;
|
||||
}
|
||||
|
||||
pub fn firstChild(self: *const Node) ?*Node {
|
||||
|
||||
@@ -184,6 +184,11 @@ pub fn clearMeasures(self: *Performance, measure_name: ?[]const u8) void {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn setResourceTimingBufferSize(self: *Performance, max_size: u32) void {
|
||||
_ = self;
|
||||
_ = max_size;
|
||||
}
|
||||
|
||||
pub fn getEntries(self: *const Performance) []*Entry {
|
||||
return self._entries.items;
|
||||
}
|
||||
@@ -281,6 +286,7 @@ pub const JsApi = struct {
|
||||
pub const measure = bridge.function(Performance.measure, .{ .dom_exception = true });
|
||||
pub const clearMarks = bridge.function(Performance.clearMarks, .{});
|
||||
pub const clearMeasures = bridge.function(Performance.clearMeasures, .{});
|
||||
pub const setResourceTimingBufferSize = bridge.function(Performance.setResourceTimingBufferSize, .{ .noop = true });
|
||||
pub const getEntries = bridge.function(Performance.getEntries, .{});
|
||||
pub const getEntriesByType = bridge.function(Performance.getEntriesByType, .{});
|
||||
pub const getEntriesByName = bridge.function(Performance.getEntriesByName, .{});
|
||||
|
||||
@@ -416,5 +416,8 @@ pub const JsApi = struct {
|
||||
|
||||
const testing = @import("../../testing.zig");
|
||||
test "WebApi: Worker" {
|
||||
try testing.htmlRunner("worker", .{});
|
||||
// Worker tests chain a worker-script fetch with a dynamic-import fetch
|
||||
// and a cross-context postMessage. The default 2 s assertion budget can
|
||||
// blow up on TSAN CI; give it more room.
|
||||
try testing.htmlRunner("worker", .{ .timeout_ms = 8000 });
|
||||
}
|
||||
|
||||
97
src/browser/webapi/XPathEvaluator.zig
Normal file
97
src/browser/webapi/XPathEvaluator.zig
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation.
|
||||
//! Mirrors `Document.evaluate` / `Document.createExpression` /
|
||||
//! `Document.createNSResolver` so an explicit
|
||||
//! `new XPathEvaluator()` instance can be used in place of the
|
||||
//! document.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
const XPathExpression = @import("XPathExpression.zig");
|
||||
|
||||
const XPathEvaluator = @This();
|
||||
|
||||
// Padding to avoid zero-size struct identity_map collisions (matches
|
||||
// the convention in ResizeObserver.zig).
|
||||
_pad: bool = false,
|
||||
|
||||
pub fn init() XPathEvaluator {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
_: *const XPathEvaluator,
|
||||
expression: []const u8,
|
||||
context_node: *Node,
|
||||
resolver: ?js.Function,
|
||||
requested_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// Namespace resolver is accepted-and-ignored (HTML mode — decision #2).
|
||||
// Result reuse is also a no-op; XPathResult.fromExpression always
|
||||
// allocates a fresh instance.
|
||||
_ = resolver;
|
||||
_ = result;
|
||||
return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame);
|
||||
}
|
||||
|
||||
pub fn createExpression(
|
||||
_: *const XPathEvaluator,
|
||||
expression: []const u8,
|
||||
resolver: ?js.Function,
|
||||
frame: *Frame,
|
||||
) !*XPathExpression {
|
||||
_ = resolver;
|
||||
return XPathExpression.init(expression, frame);
|
||||
}
|
||||
|
||||
pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node {
|
||||
// HTML-mode passthrough — the WHATWG IDL accepts a Node and returns
|
||||
// an `XPathNSResolver`, but in practice the input node is reused.
|
||||
return node;
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathEvaluator);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathEvaluator";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
pub const empty_with_no_proto = true;
|
||||
};
|
||||
|
||||
pub const constructor = bridge.constructor(XPathEvaluator.init, .{});
|
||||
pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true });
|
||||
pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true });
|
||||
pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{});
|
||||
};
|
||||
|
||||
const testing = @import("../../testing.zig");
|
||||
|
||||
test "WebApi: XPathEvaluator + XPathExpression" {
|
||||
try testing.htmlRunner("xpath/xpath_evaluator.html", .{});
|
||||
}
|
||||
105
src/browser/webapi/XPathExpression.zig
Normal file
105
src/browser/webapi/XPathExpression.zig
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathExpression` — a parsed XPath expression cached for
|
||||
//! repeated evaluation. The parsed AST lives in this object's per-
|
||||
//! instance arena (long-lived); each `evaluate()` call gets a fresh
|
||||
//! arena for its own result data so multiple evaluations don't grow
|
||||
//! the AST arena.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Page = @import("../Page.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
|
||||
const xpath = struct {
|
||||
const Ast = @import("../xpath/ast.zig");
|
||||
const Parser = @import("../xpath/Parser.zig");
|
||||
const Evaluator = @import("../xpath/Evaluator.zig");
|
||||
};
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const XPathExpression = @This();
|
||||
|
||||
_rc: lp.RC(u8) = .{},
|
||||
_arena: Allocator,
|
||||
_expr: *const xpath.Ast.Expr,
|
||||
|
||||
pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression {
|
||||
const arena = try frame.getArena(.tiny, "XPathExpression");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
// The AST borrows string slices from its input (literals, names,
|
||||
// var refs, function names). `expression` is materialized in the JS
|
||||
// call_arena and is reclaimed when the top-level call returns, so
|
||||
// dupe into our long-lived arena before parsing.
|
||||
const owned = try arena.dupe(u8, expression);
|
||||
const expr = try xpath.Parser.parse(arena, owned);
|
||||
const xe = try arena.create(XPathExpression);
|
||||
xe.* = .{ ._arena = arena, ._expr = expr };
|
||||
return xe;
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
self: *XPathExpression,
|
||||
context_node: *Node,
|
||||
requested_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// The `result` reuse parameter (WHATWG: optional XPathResult to
|
||||
// populate) is accepted-and-ignored: we always allocate fresh,
|
||||
// which matches every modern browser's effective behavior.
|
||||
_ = result;
|
||||
|
||||
const arena = try frame.getArena(.medium, "XPathResult");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame);
|
||||
return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *XPathExpression, page: *Page) void {
|
||||
page.releaseArena(self._arena);
|
||||
}
|
||||
|
||||
pub fn acquireRef(self: *XPathExpression) void {
|
||||
self._rc.acquire();
|
||||
}
|
||||
|
||||
pub fn releaseRef(self: *XPathExpression, page: *Page) void {
|
||||
self._rc.release(self, page);
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathExpression);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathExpression";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
};
|
||||
|
||||
pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true });
|
||||
};
|
||||
288
src/browser/webapi/XPathResult.zig
Normal file
288
src/browser/webapi/XPathResult.zig
Normal file
@@ -0,0 +1,288 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathResult` (full surface, all 10 type constants — decision
|
||||
//! #4). Wraps the evaluator's `result.Result` for JS consumption:
|
||||
//! coerces to the requested result type at construction, exposes the
|
||||
//! type-tagged accessors, and serves the iterator/snapshot APIs.
|
||||
//!
|
||||
//! Lifetime model: each `XPathResult` owns a per-instance arena
|
||||
//! (`getArena(.medium, ...)`) that holds both the struct and the result
|
||||
//! data (node-set slice, formatted strings). The arena is released in
|
||||
//! `deinit` once the JS wrapper's refcount hits zero.
|
||||
//!
|
||||
//! Type-mismatch accessor calls return `error.InvalidStateError` —
|
||||
//! translated to a `DOMException` by `bridge.function(.., .{
|
||||
//! .dom_exception = true })`. The WHATWG IDL technically specifies
|
||||
//! `TypeError` for type mismatches, but `InvalidStateError` is what
|
||||
//! decision #4 captures and what most legacy XPath consumers expect.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Page = @import("../Page.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
|
||||
// XPath runtime helpers. Aliased to keep the cross-directory imports
|
||||
// readable when both modules expose a `Result` type.
|
||||
const xpath = struct {
|
||||
const result = @import("../xpath/result.zig");
|
||||
const Parser = @import("../xpath/Parser.zig");
|
||||
const Evaluator = @import("../xpath/Evaluator.zig");
|
||||
};
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const XPathResult = @This();
|
||||
|
||||
// WHATWG type constants. ANY_TYPE is a request flag — at construction
|
||||
// it resolves to one of the four concrete categories (NUMBER, STRING,
|
||||
// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression
|
||||
// produced.
|
||||
pub const ANY_TYPE: u16 = 0;
|
||||
pub const NUMBER_TYPE: u16 = 1;
|
||||
pub const STRING_TYPE: u16 = 2;
|
||||
pub const BOOLEAN_TYPE: u16 = 3;
|
||||
pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4;
|
||||
pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5;
|
||||
pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6;
|
||||
pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7;
|
||||
pub const ANY_UNORDERED_NODE_TYPE: u16 = 8;
|
||||
pub const FIRST_ORDERED_NODE_TYPE: u16 = 9;
|
||||
|
||||
const Value = union(enum) {
|
||||
number: f64,
|
||||
string: []const u8,
|
||||
boolean: bool,
|
||||
nodes: []const *Node,
|
||||
};
|
||||
|
||||
_rc: lp.RC(u8) = .{},
|
||||
_arena: Allocator,
|
||||
_type: u16,
|
||||
_value: Value,
|
||||
_iter_pos: usize = 0,
|
||||
|
||||
// ----- constructors -----
|
||||
|
||||
/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and
|
||||
/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the
|
||||
/// AST + result data + the struct itself.
|
||||
pub fn fromExpression(
|
||||
expression: []const u8,
|
||||
context_node: *Node,
|
||||
requested_type: u16,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
const arena = try frame.getArena(.medium, "XPathResult");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
// The AST borrows string slices from its input (literals, names,
|
||||
// var refs, function names). `expression` is materialized in the JS
|
||||
// call_arena and is reclaimed when the top-level call returns, so
|
||||
// dupe into our long-lived arena before parsing.
|
||||
const owned = try arena.dupe(u8, expression);
|
||||
const expr = try xpath.Parser.parse(arena, owned);
|
||||
const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame);
|
||||
return fromResult(arena, requested_type, result);
|
||||
}
|
||||
|
||||
/// Wrap an already-evaluated `result.result` into an XPathResult. The
|
||||
/// caller hands over ownership of `arena` — the XPathResult will release
|
||||
/// it on deinit. Used by `XPathExpression.evaluate` (which has its own
|
||||
/// AST cache and only allocates a fresh result arena).
|
||||
pub fn fromResult(
|
||||
arena: Allocator,
|
||||
requested_type: u16,
|
||||
result: xpath.result.Result,
|
||||
) !*XPathResult {
|
||||
const value: Value = switch (requested_type) {
|
||||
ANY_TYPE => switch (result) {
|
||||
.number => |n| .{ .number = n },
|
||||
.string => |s| .{ .string = s },
|
||||
.boolean => |b| .{ .boolean = b },
|
||||
.node_set => |ns| .{ .nodes = ns },
|
||||
},
|
||||
NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) },
|
||||
STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) },
|
||||
BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) },
|
||||
UNORDERED_NODE_ITERATOR_TYPE,
|
||||
ORDERED_NODE_ITERATOR_TYPE,
|
||||
UNORDERED_NODE_SNAPSHOT_TYPE,
|
||||
ORDERED_NODE_SNAPSHOT_TYPE,
|
||||
ANY_UNORDERED_NODE_TYPE,
|
||||
FIRST_ORDERED_NODE_TYPE,
|
||||
=> switch (result) {
|
||||
.node_set => |ns| .{ .nodes = ns },
|
||||
// Requesting a node-set type for a non-node-set expression.
|
||||
// WHATWG specifies TypeError, but DOMException.fromError has
|
||||
// no TypeError mapping (would surface as a plain JS Error);
|
||||
// unify on InvalidStateError per the project plan.
|
||||
else => return error.InvalidStateError,
|
||||
},
|
||||
else => return error.InvalidStateError,
|
||||
};
|
||||
|
||||
const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) {
|
||||
.number => NUMBER_TYPE,
|
||||
.string => STRING_TYPE,
|
||||
.boolean => BOOLEAN_TYPE,
|
||||
.nodes => UNORDERED_NODE_ITERATOR_TYPE,
|
||||
} else requested_type;
|
||||
|
||||
const xr = try arena.create(XPathResult);
|
||||
xr.* = .{
|
||||
._arena = arena,
|
||||
._type = final_type,
|
||||
._value = value,
|
||||
};
|
||||
return xr;
|
||||
}
|
||||
|
||||
// ----- lifecycle -----
|
||||
|
||||
pub fn deinit(self: *XPathResult, page: *Page) void {
|
||||
page.releaseArena(self._arena);
|
||||
}
|
||||
|
||||
pub fn acquireRef(self: *XPathResult) void {
|
||||
self._rc.acquire();
|
||||
}
|
||||
|
||||
pub fn releaseRef(self: *XPathResult, page: *Page) void {
|
||||
self._rc.release(self, page);
|
||||
}
|
||||
|
||||
// ----- accessors -----
|
||||
|
||||
fn getResultType(self: *const XPathResult) u16 {
|
||||
return self._type;
|
||||
}
|
||||
|
||||
fn getNumberValue(self: *const XPathResult) !f64 {
|
||||
if (self._type != NUMBER_TYPE) return error.InvalidStateError;
|
||||
return self._value.number;
|
||||
}
|
||||
|
||||
fn getStringValue(self: *const XPathResult) ![]const u8 {
|
||||
if (self._type != STRING_TYPE) return error.InvalidStateError;
|
||||
return self._value.string;
|
||||
}
|
||||
|
||||
fn getBooleanValue(self: *const XPathResult) !bool {
|
||||
if (self._type != BOOLEAN_TYPE) return error.InvalidStateError;
|
||||
return self._value.boolean;
|
||||
}
|
||||
|
||||
fn getSingleNodeValue(self: *const XPathResult) !?*Node {
|
||||
if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
return if (self._value.nodes.len == 0) null else self._value.nodes[0];
|
||||
}
|
||||
|
||||
fn getSnapshotLength(self: *const XPathResult) !u32 {
|
||||
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
return @intCast(self._value.nodes.len);
|
||||
}
|
||||
|
||||
/// Live mutation tracking on the iterator isn't implemented — we hold a
|
||||
/// frozen pointer slice, so the iterator is never "invalidated" by DOM
|
||||
/// edits during traversal. Always returns false; matches the polyfill,
|
||||
/// which is snapshot-only.
|
||||
fn getInvalidIteratorState(_: *const XPathResult) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ----- methods -----
|
||||
|
||||
pub fn iterateNext(self: *XPathResult) !?*Node {
|
||||
if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
const pos = self._iter_pos;
|
||||
if (pos >= self._value.nodes.len) return null;
|
||||
const node = self._value.nodes[pos];
|
||||
self._iter_pos = pos + 1;
|
||||
return node;
|
||||
}
|
||||
|
||||
pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node {
|
||||
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
if (index >= self._value.nodes.len) return null;
|
||||
return self._value.nodes[index];
|
||||
}
|
||||
|
||||
// ----- JS bridge -----
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathResult);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathResult";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
};
|
||||
|
||||
// Type constants — both static (on the constructor) and instance
|
||||
// properties per the WHATWG IDL. `template = true` makes them
|
||||
// class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works.
|
||||
pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true });
|
||||
pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true });
|
||||
pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true });
|
||||
pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true });
|
||||
pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
|
||||
pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
|
||||
pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
|
||||
pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
|
||||
pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true });
|
||||
pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true });
|
||||
|
||||
pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{});
|
||||
pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true });
|
||||
pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true });
|
||||
pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true });
|
||||
pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true });
|
||||
pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true });
|
||||
pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{});
|
||||
|
||||
pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true });
|
||||
pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true });
|
||||
};
|
||||
|
||||
const testing = @import("../../testing.zig");
|
||||
|
||||
test "WebApi: XPathResult" {
|
||||
try testing.htmlRunner("xpath/xpath_result.html", .{});
|
||||
}
|
||||
|
||||
test "WebApi: XPath conformance" {
|
||||
try testing.htmlRunner("xpath/xpath_conformance.html", .{});
|
||||
}
|
||||
|
||||
// This uses console.warn, uncomment if you want to run it
|
||||
// test "WebApi: XPath perf" {
|
||||
// try testing.htmlRunner("xpath/xpath_perf.html", .{});
|
||||
// }
|
||||
@@ -71,8 +71,16 @@ pub fn setAs(self: *Link, value: []const u8, frame: *Frame) !void {
|
||||
return self.asElement().setAttributeSafe(comptime .wrap("as"), .wrap(value), frame);
|
||||
}
|
||||
|
||||
pub fn getMedia(self: *Link) []const u8 {
|
||||
return self.asElement().getAttributeSafe(comptime .wrap("media")) orelse return "";
|
||||
}
|
||||
|
||||
pub fn setMedia(self: *Link, value: []const u8, frame: *Frame) !void {
|
||||
return self.asElement().setAttributeSafe(comptime .wrap("media"), .wrap(value), frame);
|
||||
}
|
||||
|
||||
pub fn getCrossOrigin(self: *const Link) ?[]const u8 {
|
||||
return self.asConstElement().getAttributeSafe(comptime .wrap("crossOrigin"));
|
||||
return self.asConstElement().getAttributeSafe(comptime .wrap("crossorigin"));
|
||||
}
|
||||
|
||||
pub fn setCrossOrigin(self: *Link, value: []const u8, frame: *Frame) !void {
|
||||
@@ -80,7 +88,7 @@ pub fn setCrossOrigin(self: *Link, value: []const u8, frame: *Frame) !void {
|
||||
if (std.ascii.eqlIgnoreCase(value, "use-credentials")) {
|
||||
normalized = "use-credentials";
|
||||
}
|
||||
return self.asElement().setAttributeSafe(comptime .wrap("crossOrigin"), .wrap(normalized), frame);
|
||||
return self.asElement().setAttributeSafe(comptime .wrap("crossorigin"), .wrap(normalized), frame);
|
||||
}
|
||||
|
||||
pub fn linkAddedCallback(self: *Link, frame: *Frame) !void {
|
||||
@@ -120,6 +128,7 @@ pub const JsApi = struct {
|
||||
|
||||
pub const as = bridge.accessor(Link.getAs, Link.setAs, .{});
|
||||
pub const rel = bridge.accessor(Link.getRel, Link.setRel, .{});
|
||||
pub const media = bridge.accessor(Link.getMedia, Link.setMedia, .{});
|
||||
pub const href = bridge.accessor(Link.getHref, Link.setHref, .{});
|
||||
pub const crossOrigin = bridge.accessor(Link.getCrossOrigin, Link.setCrossOrigin, .{});
|
||||
pub const relList = bridge.accessor(_getRelList, null, .{ .null_as_undefined = true });
|
||||
|
||||
@@ -28,6 +28,22 @@ pub const List = @import("List.zig");
|
||||
const String = lp.String;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
// translate a Selector error to a DOMException known type.
|
||||
pub fn mapErrorToDOM(err: anyerror) anyerror {
|
||||
return switch (err) {
|
||||
error.InvalidSelector,
|
||||
error.InvalidAttributeSelector,
|
||||
error.InvalidIDSelector,
|
||||
error.InvalidClassSelector,
|
||||
error.UnknownPseudoClass,
|
||||
error.InvalidTagSelector,
|
||||
error.InvalidPseudoClass,
|
||||
error.InvalidNthPattern,
|
||||
=> error.SyntaxError,
|
||||
else => err,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parseLeaky(arena: Allocator, input: []const u8) !Parsed {
|
||||
if (input.len == 0) {
|
||||
return error.SyntaxError;
|
||||
|
||||
987
src/browser/xpath/Evaluator.zig
Normal file
987
src/browser/xpath/Evaluator.zig
Normal file
@@ -0,0 +1,987 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and
|
||||
//! produces a `Result`. The evaluator allocates intermediate values
|
||||
//! (node-set slices, formatted numbers, materialized attribute nodes)
|
||||
//! into the caller's arena. The context `Frame` is needed for
|
||||
//! `getElementById` and to materialize attributes (the attribute axis
|
||||
//! returns full `Attribute` nodes so the result is `*Node`-uniform).
|
||||
//!
|
||||
//! Document-order sort happens once at the public boundary
|
||||
//! (`evaluate()`); intermediate step results stay in axis order so
|
||||
//! reverse-axis positional predicates evaluate against proximity.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const ast = @import("ast.zig");
|
||||
const Parser = @import("Parser.zig");
|
||||
const result = @import("result.zig");
|
||||
const functions = @import("functions.zig");
|
||||
|
||||
const Frame = lp.Frame;
|
||||
const Element = Node.Element;
|
||||
const Document = Node.Document;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const Evaluator = @This();
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
WriteFailed,
|
||||
// Surfaces from Attribute materialization (`Entry.toAttribute` →
|
||||
// `String.dupe` enforces a length limit). The polyfill never hits
|
||||
// this since JS strings are unbounded, but Lightpanda's `String`
|
||||
// type caps at u32::MAX bytes — propagate so callers can surface
|
||||
// a DOM exception.
|
||||
StringTooLarge,
|
||||
UnknownFunction,
|
||||
UnionRequiresNodeSets,
|
||||
};
|
||||
|
||||
arena: Allocator,
|
||||
frame: *Frame,
|
||||
|
||||
/// Public entry. Returns the AST's value; node-sets are sorted into
|
||||
/// document order before return per XPath spec §3.3.
|
||||
pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result {
|
||||
var ev = Evaluator{ .arena = arena, .frame = frame };
|
||||
const res = try ev.evalExpr(expr, context_node, 1, 1);
|
||||
if (res == .node_set) {
|
||||
sortDocOrder(@constCast(res.node_set));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub const SearchError = Error || Parser.Error;
|
||||
|
||||
/// Convenience for `DOM.performSearch`: parse + evaluate and unwrap the
|
||||
/// node-set. Top-level scalar expressions yield an empty slice
|
||||
/// (decision #3 — these APIs are for finding nodes, not arbitrary
|
||||
/// computation).
|
||||
pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node {
|
||||
const expr = try Parser.parse(arena, expression);
|
||||
return switch (try evaluate(arena, expr, root, frame)) {
|
||||
.node_set => |ns| ns,
|
||||
else => &.{},
|
||||
};
|
||||
}
|
||||
|
||||
// ----- AST evaluation -----
|
||||
|
||||
fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
return switch (expr.*) {
|
||||
.number => |n| .{ .number = n },
|
||||
.literal => |s| .{ .string = s },
|
||||
.var_ref => .{ .string = "" }, // decision #3 stub
|
||||
.neg => |inner| blk: {
|
||||
const v = try self.evalExpr(inner, ctx, pos, size);
|
||||
const n = try result.toNumber(self.arena, v);
|
||||
break :blk .{ .number = -n };
|
||||
},
|
||||
.binop => |bo| try self.evalBinop(bo, ctx, pos, size),
|
||||
.path => |p| try self.evalPath(p, ctx),
|
||||
.filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size),
|
||||
.filter => |f| try self.evalFilter(f, ctx, pos, size),
|
||||
.fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size),
|
||||
};
|
||||
}
|
||||
|
||||
fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result {
|
||||
if (try self.tryIdLookupFastPath(path, ctx)) |res| return res;
|
||||
if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res;
|
||||
|
||||
const start: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
var current = try self.arena.alloc(*Node, 1);
|
||||
current[0] = start;
|
||||
var current_set: []const *Node = current;
|
||||
|
||||
for (path.steps) |step| {
|
||||
const r = try self.evalStep(current_set, step);
|
||||
current_set = r.node_set;
|
||||
}
|
||||
return .{ .node_set = current_set };
|
||||
}
|
||||
|
||||
// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
|
||||
// shapes (and their wildcard `//*[@id='x']` variants) and serve them
|
||||
// directly from `frame.getElementByIdFromNode`. Accepts the literal on
|
||||
// either side of `=`.
|
||||
//
|
||||
// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
|
||||
// already makes for `querySelector(All)`: the id-map only stores the
|
||||
// first element per ID in document order, so duplicate IDs (invalid
|
||||
// HTML, but possible) yield one match here where a strict tree walk
|
||||
// would find all. Acceptable because Capybara/Selenium hot paths
|
||||
// assume unique IDs and CSS has shipped this compromise for years.
|
||||
//
|
||||
// Falls through to the general path for any deviation: extra steps,
|
||||
// extra predicates, non-eq predicate, non-literal RHS, or the
|
||||
// inability to resolve a search root.
|
||||
fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
|
||||
// Two acceptable AST shapes:
|
||||
// //tag[@id='x'] parses to: ds::node() / child::tag[pred]
|
||||
// .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
|
||||
if (target.axis != .child) return null;
|
||||
if (target.predicates.len != 1) return null;
|
||||
|
||||
// Tag name (null = wildcard "*"). type_test (e.g. `node()`,
|
||||
// `text()`) doesn't qualify because getElementByIdFromNode only
|
||||
// returns elements.
|
||||
const tag_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else n,
|
||||
.type_test => return null,
|
||||
};
|
||||
|
||||
const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
|
||||
|
||||
// Resolve search root the same way the general path does.
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
|
||||
return .{ .node_set = &.{} };
|
||||
};
|
||||
const id_node = id_element.asNode();
|
||||
|
||||
// Relative paths must filter to descendants of the context.
|
||||
// getElementByIdFromNode is doc-wide.
|
||||
if (search_root != id_node and !search_root.contains(id_node)) {
|
||||
return .{ .node_set = &.{} };
|
||||
}
|
||||
|
||||
// Tag check (case-insensitive per decision #2). Element tag names
|
||||
// are stored lowercase via `getTagNameLower`; lowercase the AST
|
||||
// name once and compare.
|
||||
if (tag_name) |tag| {
|
||||
const lowered = try std.ascii.allocLowerString(self.arena, tag);
|
||||
if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
|
||||
return .{ .node_set = &.{} };
|
||||
}
|
||||
}
|
||||
|
||||
const out = try self.arena.alloc(*Node, 1);
|
||||
out[0] = id_node;
|
||||
return .{ .node_set = out };
|
||||
}
|
||||
|
||||
// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same
|
||||
// AST shape (`//<test>[preds]` / `.//<test>[preds]`), but instead of
|
||||
// dispatching to `getElementByIdFromNode`, walks the descendants of
|
||||
// the search root once in document order, applying the node test and
|
||||
// any "safe" non-positional predicates inline. Skips the general path's
|
||||
// per-step axis materialization, the per-step `filtered`/`current`
|
||||
// ArrayLists, and the dedup hash map (single-context forward walk
|
||||
// already preserves doc order).
|
||||
//
|
||||
// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`,
|
||||
// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`.
|
||||
//
|
||||
// "Safe" predicates: not numeric at the top level (number, neg,
|
||||
// arithmetic binop, or a fn-call returning a number), and free of
|
||||
// `position()`/`last()` anywhere in the predicate AST. Numeric predicates
|
||||
// would need `position()` context which the fused walk doesn't track,
|
||||
// and a `position()`/`last()` reference inside a sub-path's own step is
|
||||
// rejected conservatively even though it's local to that sub-axis.
|
||||
fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
if (target.axis != .child) return null;
|
||||
|
||||
for (target.predicates) |p| {
|
||||
if (!isSafeNonPositionalPredicate(p)) return null;
|
||||
}
|
||||
|
||||
const lowered_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
|
||||
.type_test => null,
|
||||
};
|
||||
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
try self.fusedDescend(search_root, target, lowered_name, &out);
|
||||
return .{ .node_set = out.items };
|
||||
}
|
||||
|
||||
fn fusedDescend(
|
||||
self: *Evaluator,
|
||||
parent: *Node,
|
||||
target: ast.Step,
|
||||
lowered_name: ?[]const u8,
|
||||
out: *std.ArrayList(*Node),
|
||||
) Error!void {
|
||||
var it = parent.childrenIterator();
|
||||
while (it.next()) |c| {
|
||||
if (matchTest(c, target.node_test, target.axis, lowered_name)) {
|
||||
var ok = true;
|
||||
for (target.predicates) |pred| {
|
||||
// Position / size are synthetic. Safe because the
|
||||
// predicate-safety gate already rejected any expression
|
||||
// that depends on either.
|
||||
const val = try self.evalExpr(pred, c, 1, 1);
|
||||
if (!result.toBoolean(val)) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ok) try out.append(self.arena, c);
|
||||
}
|
||||
try self.fusedDescend(c, target, lowered_name, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn matchDescendantPathShape(path: ast.Path) ?ast.Step {
|
||||
return switch (path.steps.len) {
|
||||
2 => blk: {
|
||||
if (!isDescendantOrSelfNode(path.steps[0])) break :blk null;
|
||||
break :blk path.steps[1];
|
||||
},
|
||||
3 => blk: {
|
||||
if (!isSelfNode(path.steps[0])) break :blk null;
|
||||
if (!isDescendantOrSelfNode(path.steps[1])) break :blk null;
|
||||
break :blk path.steps[2];
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool {
|
||||
if (isNumericTopLevel(expr)) return false;
|
||||
if (containsPositionOrLast(expr)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
fn isNumericTopLevel(expr: *const ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .neg => true,
|
||||
.binop => |bo| switch (bo.op) {
|
||||
.add, .sub, .mul, .div, .mod => true,
|
||||
else => false,
|
||||
},
|
||||
.fn_call => |fc| isNumericFnName(fc.name),
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isNumericFnName(name: []const u8) bool {
|
||||
const numeric = [_][]const u8{
|
||||
"position", "last", "count", "sum",
|
||||
"floor", "ceiling", "round", "number",
|
||||
"string-length",
|
||||
};
|
||||
for (numeric) |n| {
|
||||
if (std.mem.eql(u8, name, n)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn containsPositionOrLast(expr: *const ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .literal, .var_ref => false,
|
||||
.neg => |inner| containsPositionOrLast(inner),
|
||||
.binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right),
|
||||
.filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate),
|
||||
.filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps),
|
||||
.path => |p| stepsContainPositionOrLast(p.steps),
|
||||
.fn_call => |fc| std.mem.eql(u8, fc.name, "position") or
|
||||
std.mem.eql(u8, fc.name, "last") or
|
||||
argsContainPositionOrLast(fc.args),
|
||||
};
|
||||
}
|
||||
|
||||
fn stepsContainPositionOrLast(steps: []const ast.Step) bool {
|
||||
for (steps) |s| {
|
||||
for (s.predicates) |p| {
|
||||
if (containsPositionOrLast(p)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn argsContainPositionOrLast(args: []const *ast.Expr) bool {
|
||||
for (args) |a| {
|
||||
if (containsPositionOrLast(a)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isDescendantOrSelfNode(s: ast.Step) bool {
|
||||
if (s.axis != .descendant_or_self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSelfNode(s: ast.Step) bool {
|
||||
if (s.axis != .self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 {
|
||||
if (expr.* != .binop) return null;
|
||||
const bo = expr.binop;
|
||||
if (bo.op != .eq) return null;
|
||||
if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
|
||||
if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool {
|
||||
if (expr.* != .path) return false;
|
||||
const p = expr.path;
|
||||
if (p.absolute) return false;
|
||||
if (p.steps.len != 1) return false;
|
||||
const s = p.steps[0];
|
||||
if (s.axis != .attribute) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.name => |n| std.mem.eql(u8, n, attr_name),
|
||||
.type_test => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
const base = try self.evalExpr(fp.filter, ctx, pos, size);
|
||||
if (base != .node_set) return base;
|
||||
|
||||
var current: []const *Node = base.node_set;
|
||||
for (fp.steps) |step| {
|
||||
const r = try self.evalStep(current, step);
|
||||
current = r.node_set;
|
||||
}
|
||||
return .{ .node_set = current };
|
||||
}
|
||||
|
||||
fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
const base = try self.evalExpr(f.expr, ctx, pos, size);
|
||||
if (base != .node_set) return base;
|
||||
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
const sz = base.node_set.len;
|
||||
for (base.node_set, 0..) |n, idx| {
|
||||
const k = idx + 1;
|
||||
const val = try self.evalExpr(f.predicate, n, k, sz);
|
||||
if (predicateMatches(val, k)) try out.append(self.arena, n);
|
||||
}
|
||||
return .{ .node_set = out.items };
|
||||
}
|
||||
|
||||
// ----- step + axis -----
|
||||
|
||||
fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result {
|
||||
var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
|
||||
// Pre-lowercase the name test once per step. matchNameTest does
|
||||
// case-insensitive matching (decision #2); without this hoist, every
|
||||
// axis node would pay the per-byte case-fold inside `eqlIgnoreCase`.
|
||||
const lowered_name: ?[]const u8 = switch (step.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
|
||||
.type_test => null,
|
||||
};
|
||||
|
||||
for (ctx_nodes) |ctx| {
|
||||
const axis_nodes = try self.axisNodes(ctx, step.axis);
|
||||
|
||||
var filtered: std.ArrayList(*Node) = .empty;
|
||||
for (axis_nodes) |n| {
|
||||
if (matchTest(n, step.node_test, step.axis, lowered_name)) {
|
||||
try filtered.append(self.arena, n);
|
||||
}
|
||||
}
|
||||
|
||||
var current: []const *Node = filtered.items;
|
||||
for (step.predicates) |pred| {
|
||||
var next: std.ArrayList(*Node) = .empty;
|
||||
const sz = current.len;
|
||||
for (current, 0..) |n, idx| {
|
||||
const k = idx + 1;
|
||||
const val = try self.evalExpr(pred, n, k, sz);
|
||||
if (predicateMatches(val, k)) try next.append(self.arena, n);
|
||||
}
|
||||
current = next.items;
|
||||
}
|
||||
|
||||
for (current) |n| try dedup.put(self.arena, n, {});
|
||||
}
|
||||
|
||||
return .{ .node_set = dedup.keys() };
|
||||
}
|
||||
|
||||
fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node {
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
switch (axis) {
|
||||
.child => {
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |c| try out.append(self.arena, c);
|
||||
},
|
||||
.descendant => try self.appendDescendants(node, &out),
|
||||
.descendant_or_self => {
|
||||
try out.append(self.arena, node);
|
||||
try self.appendDescendants(node, &out);
|
||||
},
|
||||
.self => try out.append(self.arena, node),
|
||||
.parent => {
|
||||
if (node.parentNode()) |p| try out.append(self.arena, p);
|
||||
},
|
||||
// Reverse axes — proximity order (nearest first). Final node-set
|
||||
// is sorted to document order at the public boundary.
|
||||
.ancestor => {
|
||||
var p = node.parentNode();
|
||||
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
|
||||
},
|
||||
.ancestor_or_self => {
|
||||
try out.append(self.arena, node);
|
||||
var p = node.parentNode();
|
||||
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
|
||||
},
|
||||
.following_sibling => {
|
||||
var s = node.nextSibling();
|
||||
while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n);
|
||||
},
|
||||
.preceding_sibling => {
|
||||
var s = node.previousSibling();
|
||||
while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n);
|
||||
},
|
||||
.following => try self.appendFollowing(node, &out),
|
||||
.preceding => try self.appendPreceding(node, &out),
|
||||
.attribute => try self.appendAttributes(node, &out),
|
||||
.namespace, .unknown => {}, // decision #3 stubs
|
||||
}
|
||||
return out.items;
|
||||
}
|
||||
|
||||
fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |c| {
|
||||
try out.append(self.arena, c);
|
||||
try self.appendDescendants(c, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var n: ?*Node = start;
|
||||
while (n) |cur| : (n = cur.parentNode()) {
|
||||
var s = cur.nextSibling();
|
||||
while (s) |sn| : (s = sn.nextSibling()) {
|
||||
try out.append(self.arena, sn);
|
||||
try self.appendDescendants(sn, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
// Reverse document order: deepest-last children first, then self.
|
||||
var c = n.lastChild();
|
||||
while (c) |child| : (c = child.previousSibling()) {
|
||||
try self.appendPrecedingSubtree(child, out);
|
||||
}
|
||||
try out.append(self.arena, n);
|
||||
}
|
||||
|
||||
fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var n: ?*Node = start;
|
||||
while (n) |cur| {
|
||||
const parent = cur.parentNode() orelse break;
|
||||
var s = cur.previousSibling();
|
||||
while (s) |sn| : (s = sn.previousSibling()) {
|
||||
try self.appendPrecedingSubtree(sn, out);
|
||||
}
|
||||
n = parent;
|
||||
}
|
||||
}
|
||||
|
||||
fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
const el = node.is(Element) orelse return;
|
||||
var it = el.attributeIterator();
|
||||
while (it.next()) |entry| {
|
||||
// Memoize via frame._attribute_lookup so repeated XPath queries
|
||||
// (Capybara/Selenium polling) reuse the same *Attribute instead
|
||||
// of leaking fresh ones into page-lifetime storage on every call.
|
||||
// Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex.
|
||||
const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry));
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = try entry.toAttribute(el, self.frame);
|
||||
}
|
||||
try out.append(self.arena, gop.value_ptr.*._proto);
|
||||
}
|
||||
}
|
||||
|
||||
// ----- node test matching -----
|
||||
|
||||
fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool {
|
||||
return switch (test_) {
|
||||
.type_test => |kind| switch (kind) {
|
||||
.node => true,
|
||||
// XPath 1.0 §5.7: the data model has no CDATASection node —
|
||||
// CDATA content is part of the text node value. Match both
|
||||
// Text (3) and CDATASection (4) DOM node types.
|
||||
.text => node.getNodeType() == 3 or node.getNodeType() == 4,
|
||||
.comment => node.getNodeType() == 8,
|
||||
.processing_instruction => node.getNodeType() == 7,
|
||||
},
|
||||
.name => |name| matchNameTest(node, name, axis, lowered_name),
|
||||
};
|
||||
}
|
||||
|
||||
fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool {
|
||||
// `lowered_name` is non-null iff `name != "*"`. Element tag names
|
||||
// (`getTagNameLower`) and html5ever-stored attribute names are already
|
||||
// lowercase, so a plain `mem.eql` against the pre-lowered test name
|
||||
// replaces the per-call `eqlIgnoreCase`.
|
||||
if (axis == .attribute) {
|
||||
if (std.mem.eql(u8, name, "*")) return node._type == .attribute;
|
||||
const attr = switch (node._type) {
|
||||
.attribute => |a| a,
|
||||
else => return false,
|
||||
};
|
||||
return std.mem.eql(u8, attr._name.str(), lowered_name.?);
|
||||
}
|
||||
const el = node.is(Element) orelse return false;
|
||||
if (std.mem.eql(u8, name, "*")) return true;
|
||||
return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?);
|
||||
}
|
||||
|
||||
// ----- binop -----
|
||||
|
||||
fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
switch (bo.op) {
|
||||
.or_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
if (result.toBoolean(l)) return .{ .boolean = true };
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = result.toBoolean(r) };
|
||||
},
|
||||
.and_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
if (!result.toBoolean(l)) return .{ .boolean = false };
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = result.toBoolean(r) };
|
||||
},
|
||||
.eq, .neq, .lt, .gt, .lte, .gte => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = try self.xCmp(l, r, bo.op) };
|
||||
},
|
||||
.add, .sub, .mul, .div, .mod => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
const ln = try result.toNumber(self.arena, l);
|
||||
const rn = try result.toNumber(self.arena, r);
|
||||
const v: f64 = switch (bo.op) {
|
||||
.add => ln + rn,
|
||||
.sub => ln - rn,
|
||||
.mul => ln * rn,
|
||||
.div => ln / rn,
|
||||
// JS `%` and Zig `@rem` agree on sign for finite values
|
||||
// and propagate NaN (XPath §3.5).
|
||||
.mod => @rem(ln, rn),
|
||||
else => unreachable,
|
||||
};
|
||||
return .{ .number = v };
|
||||
},
|
||||
.union_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets;
|
||||
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
for (l.node_set) |n| try seen.put(self.arena, n, {});
|
||||
for (r.node_set) |n| try seen.put(self.arena, n, {});
|
||||
const nodes = seen.keys();
|
||||
sortDocOrder(@constCast(nodes));
|
||||
return .{ .node_set = nodes };
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ----- comparison (XPath spec §3.4) -----
|
||||
|
||||
fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool {
|
||||
const is_eq = (op == .eq or op == .neq);
|
||||
const l_is_set = (left == .node_set);
|
||||
const r_is_set = (right == .node_set);
|
||||
|
||||
if (l_is_set and r_is_set) {
|
||||
// Cache right-side string-values once. Without this, each left node
|
||||
// would pay |right| allocations — O(N×M) for a set×set comparison
|
||||
// (e.g. `//foo = //bar` on a large page).
|
||||
const right_strings = try self.arena.alloc([]const u8, right.node_set.len);
|
||||
for (right.node_set, 0..) |r, i| {
|
||||
right_strings[i] = try result.stringValueOf(self.arena, r);
|
||||
}
|
||||
for (left.node_set) |l| {
|
||||
const lv = try result.stringValueOf(self.arena, l);
|
||||
for (right_strings) |rv| {
|
||||
const matched = if (is_eq)
|
||||
cmpString(lv, rv, op)
|
||||
else
|
||||
cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op);
|
||||
if (matched) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (l_is_set or r_is_set) {
|
||||
const ns = if (l_is_set) left.node_set else right.node_set;
|
||||
const other = if (l_is_set) right else left;
|
||||
const ns_left = l_is_set;
|
||||
|
||||
if (other == .boolean) {
|
||||
const ns_b = ns.len > 0;
|
||||
const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b };
|
||||
return cmpBool(a, b, op);
|
||||
}
|
||||
|
||||
for (ns) |n| {
|
||||
const sv = try result.stringValueOf(self.arena, n);
|
||||
const matched = switch (other) {
|
||||
.number => |num| blk: {
|
||||
const sv_num = result.stringToNumber(sv);
|
||||
const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num };
|
||||
break :blk cmpNumber(a, b, op);
|
||||
},
|
||||
.string => |s| blk: {
|
||||
if (is_eq) {
|
||||
const a, const b = if (ns_left) .{ sv, s } else .{ s, sv };
|
||||
break :blk cmpString(a, b, op);
|
||||
}
|
||||
const sv_num = result.stringToNumber(sv);
|
||||
const s_num = result.stringToNumber(s);
|
||||
const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num };
|
||||
break :blk cmpNumber(a, b, op);
|
||||
},
|
||||
.boolean, .node_set => unreachable, // handled above
|
||||
};
|
||||
if (matched) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Neither is a node-set.
|
||||
if (is_eq) {
|
||||
if (left == .boolean or right == .boolean) {
|
||||
return cmpBool(result.toBoolean(left), result.toBoolean(right), op);
|
||||
}
|
||||
if (left == .number or right == .number) {
|
||||
const ln = try result.toNumber(self.arena, left);
|
||||
const rn = try result.toNumber(self.arena, right);
|
||||
return cmpNumber(ln, rn, op);
|
||||
}
|
||||
const ls = try result.toString(self.arena, left);
|
||||
const rs = try result.toString(self.arena, right);
|
||||
return cmpString(ls, rs, op);
|
||||
}
|
||||
// Non-eq with no node-set: both → number.
|
||||
const ln = try result.toNumber(self.arena, left);
|
||||
const rn = try result.toNumber(self.arena, right);
|
||||
return cmpNumber(ln, rn, op);
|
||||
}
|
||||
|
||||
fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool {
|
||||
const equal = std.mem.eql(u8, a, b);
|
||||
return switch (op) {
|
||||
.eq => equal,
|
||||
.neq => !equal,
|
||||
else => unreachable, // <, > etc. always coerce to number first
|
||||
};
|
||||
}
|
||||
|
||||
fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool {
|
||||
// Native f64 comparison gives correct NaN semantics:
|
||||
// NaN == X is false, NaN != X is true, NaN < X (etc.) is false.
|
||||
return switch (op) {
|
||||
.eq => a == b,
|
||||
.neq => a != b,
|
||||
.lt => a < b,
|
||||
.gt => a > b,
|
||||
.lte => a <= b,
|
||||
.gte => a >= b,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool {
|
||||
return switch (op) {
|
||||
.eq => a == b,
|
||||
.neq => a != b,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
// ----- function calls -----
|
||||
|
||||
fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
// position()/last() stay here — they need the (pos, size) closure
|
||||
// that functions.call doesn't see. Keeping them inline avoids
|
||||
// pushing per-call context through Functions' signature.
|
||||
if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) };
|
||||
if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) };
|
||||
|
||||
// Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)`
|
||||
// pattern; lazy short-circuit isn't needed because `or`/`and` are
|
||||
// binops handled in evalBinop, not function calls.
|
||||
const eval_args = try self.arena.alloc(result.Result, fc.args.len);
|
||||
for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size);
|
||||
|
||||
return functions.call(self.arena, fc.name, eval_args, ctx, self.frame);
|
||||
}
|
||||
|
||||
// ----- helpers -----
|
||||
|
||||
fn predicateMatches(val: result.Result, position: usize) bool {
|
||||
return switch (val) {
|
||||
// Numeric predicate value selects only the node at that position
|
||||
// (1-based). Non-integer numbers never match.
|
||||
.number => |n| n == @as(f64, @floatFromInt(position)),
|
||||
else => result.toBoolean(val),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn sortDocOrder(nodes: []*Node) void {
|
||||
if (nodes.len <= 1) return;
|
||||
std.mem.sort(*Node, nodes, {}, lessThanDocOrder);
|
||||
}
|
||||
|
||||
fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool {
|
||||
if (a == b) return false;
|
||||
const pos = a.compareDocumentPosition(b);
|
||||
// FOLLOWING (0x04) — b comes after a in document order.
|
||||
return (pos & 0x04) != 0;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests — pure-logic only. DOM-dependent evaluation lands as HTML
|
||||
// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig
|
||||
// way to construct a Frame + Document tree without the JS runtime.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
|
||||
test "Evaluator: cmpNumber NaN semantics" {
|
||||
const nan = std.math.nan(f64);
|
||||
try testing.expect(!cmpNumber(nan, nan, .eq));
|
||||
try testing.expect(cmpNumber(nan, nan, .neq));
|
||||
try testing.expect(!cmpNumber(nan, 0, .lt));
|
||||
try testing.expect(!cmpNumber(nan, 0, .gt));
|
||||
try testing.expect(!cmpNumber(nan, 0, .lte));
|
||||
try testing.expect(!cmpNumber(nan, 0, .gte));
|
||||
try testing.expect(cmpNumber(0, 0, .eq));
|
||||
try testing.expect(cmpNumber(1, 2, .lt));
|
||||
try testing.expect(cmpNumber(2, 1, .gt));
|
||||
try testing.expect(cmpNumber(1, 1, .lte));
|
||||
try testing.expect(cmpNumber(1, 1, .gte));
|
||||
}
|
||||
|
||||
test "Evaluator: cmpString" {
|
||||
try testing.expect(cmpString("a", "a", .eq));
|
||||
try testing.expect(!cmpString("a", "b", .eq));
|
||||
try testing.expect(cmpString("a", "b", .neq));
|
||||
try testing.expect(!cmpString("a", "a", .neq));
|
||||
}
|
||||
|
||||
test "Evaluator: cmpBool" {
|
||||
try testing.expect(cmpBool(true, true, .eq));
|
||||
try testing.expect(!cmpBool(true, false, .eq));
|
||||
try testing.expect(cmpBool(true, false, .neq));
|
||||
}
|
||||
|
||||
test "Evaluator: predicateMatches numeric vs boolean" {
|
||||
try testing.expect(predicateMatches(.{ .number = 1 }, 1));
|
||||
try testing.expect(!predicateMatches(.{ .number = 2 }, 1));
|
||||
// Non-integer never matches.
|
||||
try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1));
|
||||
// Boolean: any truthy value passes regardless of position.
|
||||
try testing.expect(predicateMatches(.{ .boolean = true }, 7));
|
||||
try testing.expect(!predicateMatches(.{ .boolean = false }, 1));
|
||||
// String: nonempty truthy.
|
||||
try testing.expect(predicateMatches(.{ .string = "x" }, 99));
|
||||
try testing.expect(!predicateMatches(.{ .string = "" }, 1));
|
||||
// Empty node-set: falsy.
|
||||
try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1));
|
||||
}
|
||||
|
||||
test "Evaluator: scalar arithmetic via parsed expressions" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "1 + 2", 3 },
|
||||
.{ "5 - 3", 2 },
|
||||
.{ "4 * 2", 8 },
|
||||
.{ "10 div 4", 2.5 },
|
||||
.{ "10 mod 3", 1 },
|
||||
.{ "-5", -5 },
|
||||
.{ "1 + 2 * 3", 7 },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
// Frame is unused for pure-arithmetic AST. The unsafe cast lets
|
||||
// us exercise binop / number paths without a real DOM. Any path
|
||||
// accessing the Frame would crash; the inputs above never do.
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: scalar comparison via parsed expressions" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "1 = 1", true },
|
||||
.{ "1 = 2", false },
|
||||
.{ "1 != 2", true },
|
||||
.{ "1 < 2", true },
|
||||
.{ "2 < 1", false },
|
||||
.{ "1 <= 1", true },
|
||||
.{ "2 >= 2", true },
|
||||
.{ "'abc' = 'abc'", true },
|
||||
.{ "'abc' != 'abd'", true },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: position() and last() reflect context" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
{
|
||||
const expr = try Parser.parse(a, "position()");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
|
||||
try testing.expectEqual(@as(f64, 3), r.number);
|
||||
}
|
||||
{
|
||||
const expr = try Parser.parse(a, "last()");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
|
||||
try testing.expectEqual(@as(f64, 5), r.number);
|
||||
}
|
||||
{
|
||||
// Logical short-circuit: last() never evaluates if first
|
||||
// operand is true.
|
||||
const expr = try Parser.parse(a, "1 = 1 or last() > 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: short-circuit and/or" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
inline for (.{
|
||||
.{ "1 = 2 or 1 = 1", true },
|
||||
.{ "1 = 1 and 1 = 2", false },
|
||||
.{ "1 = 1 and 2 = 2", true },
|
||||
.{ "1 = 2 and 1 = 1", false },
|
||||
.{ "1 = 2 or 2 = 1", false },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: unary minus" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
const expr = try Parser.parse(a, "-(3 + 2)");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expectEqual(@as(f64, -5), r.number);
|
||||
}
|
||||
|
||||
test "Evaluator: division by zero produces infinity / NaN per IEEE" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
{
|
||||
const expr = try Parser.parse(a, "1 div 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(std.math.isPositiveInf(r.number));
|
||||
}
|
||||
{
|
||||
const expr = try Parser.parse(a, "0 div 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: searchAll on scalar expression returns empty (decision #3)" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
// Synthetic frame/root pointers are safe here because pure-scalar
|
||||
// expressions (binop, literal, true(), comparison) never reach into
|
||||
// the Frame or the context node. Adding a DOM-touching expression
|
||||
// (e.g. `id('x')`) to this list would crash on dereference.
|
||||
inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| {
|
||||
const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000));
|
||||
try testing.expectEqual(@as(usize, 0), nodes.len);
|
||||
}
|
||||
}
|
||||
957
src/browser/xpath/Parser.zig
Normal file
957
src/browser/xpath/Parser.zig
Normal file
@@ -0,0 +1,957 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 expression parser.
|
||||
//!
|
||||
//! Recursive descent over a fully-tokenized stream, producing an
|
||||
//! `ast.Expr` tree allocated on the caller's arena. The AST borrows
|
||||
//! string/name slices from `input` and is valid for as long as the
|
||||
//! arena and input outlive it.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
const ast = @import("ast.zig");
|
||||
|
||||
const Token = Tokenizer.Token;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const Parser = @This();
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
UnexpectedToken,
|
||||
ExpectedNodeTest,
|
||||
ExpectedPrimaryExpr,
|
||||
MaxDepthExceeded,
|
||||
};
|
||||
|
||||
/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`,
|
||||
/// `------5`) from blowing the stack. Real XPath expressions never come
|
||||
/// close to this; browsers typically allow several hundred.
|
||||
const max_depth: u16 = 64;
|
||||
|
||||
arena: Allocator,
|
||||
tokens: []const Token,
|
||||
pos: usize = 0,
|
||||
depth: u16 = 0,
|
||||
|
||||
pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr {
|
||||
var token_list: std.ArrayList(Token) = .empty;
|
||||
// Token count is bounded by input length; ¼-byte-per-token is
|
||||
// generous for typical XPath and skips ArrayList regrowth.
|
||||
try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4));
|
||||
var tokenizer = Tokenizer{ .input = input };
|
||||
while (true) {
|
||||
const tok = tokenizer.next();
|
||||
try token_list.append(arena, tok);
|
||||
if (tok == .eof) break;
|
||||
}
|
||||
|
||||
var parser = Parser{
|
||||
.arena = arena,
|
||||
.tokens = token_list.items,
|
||||
};
|
||||
const expr = try parser.parseExpr();
|
||||
if (parser.peek() != .eof) return error.UnexpectedToken;
|
||||
return expr;
|
||||
}
|
||||
|
||||
// --- token cursor helpers ---
|
||||
|
||||
fn peek(self: *const Parser) Token {
|
||||
return self.tokens[self.pos];
|
||||
}
|
||||
|
||||
fn lookahead(self: *const Parser, offset: usize) Token {
|
||||
const idx = self.pos + offset;
|
||||
if (idx >= self.tokens.len) return .eof;
|
||||
return self.tokens[idx];
|
||||
}
|
||||
|
||||
fn advance(self: *Parser) Token {
|
||||
const tok = self.tokens[self.pos];
|
||||
self.pos += 1;
|
||||
return tok;
|
||||
}
|
||||
|
||||
fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool {
|
||||
return self.peek() == tag;
|
||||
}
|
||||
|
||||
fn match(self: *Parser, tag: std.meta.Tag(Token)) bool {
|
||||
if (self.at(tag)) {
|
||||
_ = self.advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token {
|
||||
if (!self.at(tag)) return error.UnexpectedToken;
|
||||
return self.advance();
|
||||
}
|
||||
|
||||
fn matchKeyword(self: *Parser, keyword: []const u8) bool {
|
||||
const tok = self.peek();
|
||||
if (tok == .name and std.mem.eql(u8, tok.name, keyword)) {
|
||||
_ = self.advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr {
|
||||
const expr = try self.arena.create(ast.Expr);
|
||||
expr.* = value;
|
||||
return expr;
|
||||
}
|
||||
|
||||
fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr {
|
||||
return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } });
|
||||
}
|
||||
|
||||
// --- operator-precedence chain ---
|
||||
//
|
||||
// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path
|
||||
|
||||
fn parseExpr(self: *Parser) Error!*ast.Expr {
|
||||
if (self.depth >= max_depth) return error.MaxDepthExceeded;
|
||||
self.depth += 1;
|
||||
defer self.depth -= 1;
|
||||
return self.parseOrExpr();
|
||||
}
|
||||
|
||||
fn parseOrExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseAndExpr();
|
||||
while (self.matchKeyword("or")) {
|
||||
const right = try self.parseAndExpr();
|
||||
left = try self.makeBinop(.or_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseAndExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseEqualityExpr();
|
||||
while (self.matchKeyword("and")) {
|
||||
const right = try self.parseEqualityExpr();
|
||||
left = try self.makeBinop(.and_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseEqualityExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseRelationalExpr();
|
||||
while (equalityOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseRelationalExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseRelationalExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseAdditiveExpr();
|
||||
while (relationalOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseAdditiveExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseMultExpr();
|
||||
while (additiveOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseMultExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
// After a complete unary expression, `*` is multiply; `div`/`mod` are
|
||||
// operator-position keywords (tokenized as Name).
|
||||
fn parseMultExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseUnaryExpr();
|
||||
while (multOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseUnaryExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseUnaryExpr(self: *Parser) Error!*ast.Expr {
|
||||
if (self.match(.minus)) {
|
||||
if (self.depth >= max_depth) return error.MaxDepthExceeded;
|
||||
self.depth += 1;
|
||||
defer self.depth -= 1;
|
||||
const operand = try self.parseUnaryExpr();
|
||||
return try self.makeExpr(.{ .neg = operand });
|
||||
}
|
||||
return self.parseUnionExpr();
|
||||
}
|
||||
|
||||
fn parseUnionExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parsePathExpr();
|
||||
while (self.match(.pipe)) {
|
||||
const right = try self.parsePathExpr();
|
||||
left = try self.makeBinop(.union_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
// --- path expressions ---
|
||||
|
||||
fn parsePathExpr(self: *Parser) Error!*ast.Expr {
|
||||
const t = self.peek();
|
||||
|
||||
if (t == .slash or t == .double_slash) {
|
||||
return self.parseAbsPath();
|
||||
}
|
||||
|
||||
// Filter-vs-relative-path disambiguation: a primary expression
|
||||
// starts with `(`, string, number, `$`, or a `name(` where the
|
||||
// name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`).
|
||||
const is_filter = switch (t) {
|
||||
.lparen, .string, .number, .dollar => true,
|
||||
.name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name),
|
||||
else => false,
|
||||
};
|
||||
|
||||
if (is_filter) {
|
||||
var primary = try self.parsePrimaryExpr();
|
||||
while (self.match(.lbracket)) {
|
||||
const pred = try self.parseExpr();
|
||||
_ = try self.expect(.rbracket);
|
||||
primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } });
|
||||
}
|
||||
if (self.peek() == .slash or self.peek() == .double_slash) {
|
||||
const dsl = self.advance() == .double_slash;
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
if (dsl) try steps.append(self.arena, descendantOrSelfStep());
|
||||
try self.parseRelStepsInto(&steps);
|
||||
return try self.makeExpr(.{ .filter_path = .{
|
||||
.filter = primary,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
return primary;
|
||||
}
|
||||
|
||||
return self.parseRelPath();
|
||||
}
|
||||
|
||||
fn parseAbsPath(self: *Parser) Error!*ast.Expr {
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
if (self.match(.double_slash)) {
|
||||
try steps.append(self.arena, descendantOrSelfStep());
|
||||
try self.parseRelStepsInto(&steps);
|
||||
} else {
|
||||
_ = try self.expect(.slash);
|
||||
// `/` alone is the document root — no step required.
|
||||
if (self.canStartStep()) try self.parseRelStepsInto(&steps);
|
||||
}
|
||||
return try self.makeExpr(.{ .path = .{
|
||||
.absolute = true,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
|
||||
fn parseRelPath(self: *Parser) Error!*ast.Expr {
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
try self.parseRelStepsInto(&steps);
|
||||
return try self.makeExpr(.{ .path = .{
|
||||
.absolute = false,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
|
||||
fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void {
|
||||
try steps.append(self.arena, try self.parseStep());
|
||||
while (self.peek() == .slash or self.peek() == .double_slash) {
|
||||
if (self.advance() == .double_slash) {
|
||||
try steps.append(self.arena, descendantOrSelfStep());
|
||||
}
|
||||
try steps.append(self.arena, try self.parseStep());
|
||||
}
|
||||
}
|
||||
|
||||
fn canStartStep(self: *const Parser) bool {
|
||||
return switch (self.peek()) {
|
||||
.name, .star, .dot, .double_dot, .at => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseStep(self: *Parser) Error!ast.Step {
|
||||
// Abbreviated steps `.` and `..` carry no axis, node-test, or
|
||||
// predicates — predicates after `.` are a parse error per polyfill.
|
||||
if (self.match(.dot)) return abbreviatedStep(.self);
|
||||
if (self.match(.double_dot)) return abbreviatedStep(.parent);
|
||||
|
||||
var axis: ast.Axis = .child;
|
||||
if (self.match(.at)) {
|
||||
axis = .attribute;
|
||||
} else if (self.peek() == .name and self.lookahead(1) == .double_colon) {
|
||||
const axis_name = self.advance().name;
|
||||
_ = self.advance(); // `::`
|
||||
axis = parseAxisName(axis_name);
|
||||
}
|
||||
|
||||
const node_test = try self.parseNodeTest();
|
||||
|
||||
var preds: std.ArrayList(*ast.Expr) = .empty;
|
||||
while (self.match(.lbracket)) {
|
||||
const pred = try self.parseExpr();
|
||||
_ = try self.expect(.rbracket);
|
||||
try preds.append(self.arena, pred);
|
||||
}
|
||||
|
||||
return .{ .axis = axis, .node_test = node_test, .predicates = preds.items };
|
||||
}
|
||||
|
||||
fn parseNodeTest(self: *Parser) Error!ast.NodeTest {
|
||||
if (self.match(.star)) return .{ .name = "*" };
|
||||
if (self.peek() != .name) return error.ExpectedNodeTest;
|
||||
|
||||
const name = self.peek().name;
|
||||
if (typeTestKind(name)) |type_test| {
|
||||
if (self.lookahead(1) == .lparen) {
|
||||
_ = self.advance(); // name
|
||||
_ = self.advance(); // `(`
|
||||
// `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub).
|
||||
if (type_test == .processing_instruction and self.peek() == .string) {
|
||||
_ = self.advance();
|
||||
}
|
||||
_ = try self.expect(.rparen);
|
||||
return .{ .type_test = type_test };
|
||||
}
|
||||
}
|
||||
_ = self.advance();
|
||||
return .{ .name = name };
|
||||
}
|
||||
|
||||
fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr {
|
||||
switch (self.peek()) {
|
||||
.string => |s| {
|
||||
_ = self.advance();
|
||||
return try self.makeExpr(.{ .literal = s });
|
||||
},
|
||||
.number => |n| {
|
||||
_ = self.advance();
|
||||
return try self.makeExpr(.{ .number = n });
|
||||
},
|
||||
.dollar => {
|
||||
_ = self.advance();
|
||||
const name_tok = try self.expect(.name);
|
||||
return try self.makeExpr(.{ .var_ref = name_tok.name });
|
||||
},
|
||||
.lparen => {
|
||||
_ = self.advance();
|
||||
const e = try self.parseExpr();
|
||||
_ = try self.expect(.rparen);
|
||||
return e;
|
||||
},
|
||||
.name => |name| {
|
||||
_ = self.advance();
|
||||
_ = try self.expect(.lparen);
|
||||
var args: std.ArrayList(*ast.Expr) = .empty;
|
||||
if (self.peek() != .rparen) {
|
||||
try args.append(self.arena, try self.parseExpr());
|
||||
while (self.match(.comma)) {
|
||||
try args.append(self.arena, try self.parseExpr());
|
||||
}
|
||||
}
|
||||
_ = try self.expect(.rparen);
|
||||
return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } });
|
||||
},
|
||||
else => return error.ExpectedPrimaryExpr,
|
||||
}
|
||||
}
|
||||
|
||||
// --- pure helpers ---
|
||||
|
||||
fn equalityOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.eq => .eq,
|
||||
.neq => .neq,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn relationalOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.lt => .lt,
|
||||
.gt => .gt,
|
||||
.lte => .lte,
|
||||
.gte => .gte,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn additiveOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.plus => .add,
|
||||
.minus => .sub,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn multOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.star => .mul,
|
||||
.name => |name| blk: {
|
||||
if (std.mem.eql(u8, name, "div")) break :blk .div;
|
||||
if (std.mem.eql(u8, name, "mod")) break :blk .mod;
|
||||
break :blk null;
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn descendantOrSelfStep() ast.Step {
|
||||
return .{
|
||||
.axis = .descendant_or_self,
|
||||
.node_test = .{ .type_test = .node },
|
||||
.predicates = &.{},
|
||||
};
|
||||
}
|
||||
|
||||
fn abbreviatedStep(axis: ast.Axis) ast.Step {
|
||||
return .{
|
||||
.axis = axis,
|
||||
.node_test = .{ .type_test = .node },
|
||||
.predicates = &.{},
|
||||
};
|
||||
}
|
||||
|
||||
fn isNodeTypeName(name: []const u8) bool {
|
||||
return typeTestKind(name) != null;
|
||||
}
|
||||
|
||||
const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{
|
||||
.{ "node", .node },
|
||||
.{ "text", .text },
|
||||
.{ "comment", .comment },
|
||||
.{ "processing-instruction", .processing_instruction },
|
||||
});
|
||||
|
||||
fn typeTestKind(name: []const u8) ?ast.TypeTest {
|
||||
return type_test_lookup.get(name);
|
||||
}
|
||||
|
||||
const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{
|
||||
.{ "child", .child },
|
||||
.{ "descendant", .descendant },
|
||||
.{ "descendant-or-self", .descendant_or_self },
|
||||
.{ "self", .self },
|
||||
.{ "parent", .parent },
|
||||
.{ "ancestor", .ancestor },
|
||||
.{ "ancestor-or-self", .ancestor_or_self },
|
||||
.{ "following-sibling", .following_sibling },
|
||||
.{ "preceding-sibling", .preceding_sibling },
|
||||
.{ "following", .following },
|
||||
.{ "preceding", .preceding },
|
||||
.{ "attribute", .attribute },
|
||||
.{ "namespace", .namespace },
|
||||
});
|
||||
|
||||
fn parseAxisName(name: []const u8) ast.Axis {
|
||||
return axis_lookup.get(name) orelse .unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
errdefer arena.deinit();
|
||||
const expr = try parse(arena.allocator(), input);
|
||||
return .{ .arena = arena, .expr = expr };
|
||||
}
|
||||
|
||||
test "XPath.Parser: number literal" {
|
||||
var fx = try parseFixture("42");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 42), fx.expr.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: string literal" {
|
||||
var fx = try parseFixture("'hello'");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("hello", fx.expr.literal);
|
||||
}
|
||||
|
||||
test "XPath.Parser: variable reference strips $" {
|
||||
var fx = try parseFixture("$x");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("x", fx.expr.var_ref);
|
||||
}
|
||||
|
||||
test "XPath.Parser: parenthesized expression unwraps" {
|
||||
var fx = try parseFixture("(42)");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 42), fx.expr.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call with no args" {
|
||||
var fx = try parseFixture("position()");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("position", fx.expr.fn_call.name);
|
||||
try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call with args" {
|
||||
var fx = try parseFixture("substring('abc', 2, 1)");
|
||||
defer fx.arena.deinit();
|
||||
const fc = fx.expr.fn_call;
|
||||
try testing.expectEqualStrings("substring", fc.name);
|
||||
try testing.expectEqual(@as(usize, 3), fc.args.len);
|
||||
try testing.expectEqualStrings("abc", fc.args[0].literal);
|
||||
try testing.expectEqual(@as(f64, 2), fc.args[1].number);
|
||||
try testing.expectEqual(@as(f64, 1), fc.args[2].number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: arithmetic precedence — mul binds tighter than add" {
|
||||
var fx = try parseFixture("1 + 2 * 3");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: add(1, mul(2, 3))
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.add, top.op);
|
||||
try testing.expectEqual(@as(f64, 1), top.left.number);
|
||||
const mul = top.right.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.mul, mul.op);
|
||||
try testing.expectEqual(@as(f64, 2), mul.left.number);
|
||||
try testing.expectEqual(@as(f64, 3), mul.right.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: arithmetic left-associativity" {
|
||||
var fx = try parseFixture("1 - 2 - 3");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: sub(sub(1, 2), 3)
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.sub, top.op);
|
||||
try testing.expectEqual(@as(f64, 3), top.right.number);
|
||||
const inner = top.left.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.sub, inner.op);
|
||||
try testing.expectEqual(@as(f64, 1), inner.left.number);
|
||||
try testing.expectEqual(@as(f64, 2), inner.right.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: div and mod are operator-position keywords" {
|
||||
var fx = try parseFixture("7 div 2");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op);
|
||||
|
||||
var fx2 = try parseFixture("7 mod 2");
|
||||
defer fx2.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: comparison operators" {
|
||||
inline for (.{
|
||||
.{ "1 = 2", ast.BinOpKind.eq },
|
||||
.{ "1 != 2", ast.BinOpKind.neq },
|
||||
.{ "1 < 2", ast.BinOpKind.lt },
|
||||
.{ "1 <= 2", ast.BinOpKind.lte },
|
||||
.{ "1 > 2", ast.BinOpKind.gt },
|
||||
.{ "1 >= 2", ast.BinOpKind.gte },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.binop.op);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: logical or/and short-circuit chain" {
|
||||
var fx = try parseFixture("a or b and c");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.or_, top.op);
|
||||
try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: unary minus" {
|
||||
var fx = try parseFixture("-1");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 1), fx.expr.neg.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: union" {
|
||||
var fx = try parseFixture("a | b");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: absolute path / alone is document root" {
|
||||
var fx = try parseFixture("/");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 0), path.steps.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: absolute path /foo" {
|
||||
var fx = try parseFixture("/foo");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps.len);
|
||||
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" {
|
||||
var fx = try parseFixture("//foo");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
|
||||
try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test);
|
||||
try testing.expectEqualStrings("foo", path.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: relative path child::foo/bar" {
|
||||
var fx = try parseFixture("foo/bar");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(!path.absolute);
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.child, path.steps[0].axis);
|
||||
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
|
||||
try testing.expectEqualStrings("bar", path.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: abbreviated steps . and .." {
|
||||
var fx = try parseFixture("./..");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.self, path.steps[0].axis);
|
||||
try testing.expectEqual(ast.Axis.parent, path.steps[1].axis);
|
||||
}
|
||||
|
||||
test "XPath.Parser: attribute axis @class" {
|
||||
var fx = try parseFixture("@class");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(ast.Axis.attribute, step.axis);
|
||||
try testing.expectEqualStrings("class", step.node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: all 12 named axes parse correctly" {
|
||||
inline for (.{
|
||||
.{ "child::a", ast.Axis.child },
|
||||
.{ "descendant::a", ast.Axis.descendant },
|
||||
.{ "descendant-or-self::a", ast.Axis.descendant_or_self },
|
||||
.{ "self::a", ast.Axis.self },
|
||||
.{ "parent::a", ast.Axis.parent },
|
||||
.{ "ancestor::a", ast.Axis.ancestor },
|
||||
.{ "ancestor-or-self::a", ast.Axis.ancestor_or_self },
|
||||
.{ "following-sibling::a", ast.Axis.following_sibling },
|
||||
.{ "preceding-sibling::a", ast.Axis.preceding_sibling },
|
||||
.{ "following::a", ast.Axis.following },
|
||||
.{ "preceding::a", ast.Axis.preceding },
|
||||
.{ "namespace::a", ast.Axis.namespace },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.path.steps[0].axis);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" {
|
||||
var fx = try parseFixture("wibble::a");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis);
|
||||
}
|
||||
|
||||
test "XPath.Parser: wildcard *" {
|
||||
var fx = try parseFixture("*");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: namespace-prefixed name and wildcard" {
|
||||
var fx = try parseFixture("svg:rect");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name);
|
||||
|
||||
var fx2 = try parseFixture("svg:*");
|
||||
defer fx2.arena.deinit();
|
||||
try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: node-type tests" {
|
||||
inline for (.{
|
||||
.{ "node()", ast.TypeTest.node },
|
||||
.{ "text()", ast.TypeTest.text },
|
||||
.{ "comment()", ast.TypeTest.comment },
|
||||
.{ "processing-instruction()", ast.TypeTest.processing_instruction },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: processing-instruction with literal target — consumed but ignored" {
|
||||
var fx = try parseFixture("processing-instruction('xml-stylesheet')");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test);
|
||||
}
|
||||
|
||||
test "XPath.Parser: predicate on step" {
|
||||
var fx = try parseFixture("p[1]");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(@as(usize, 1), step.predicates.len);
|
||||
try testing.expectEqual(@as(f64, 1), step.predicates[0].number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: multi-predicate step" {
|
||||
var fx = try parseFixture("p[1][@x]");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(@as(usize, 2), step.predicates.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter expression with predicate parses as Filter, not Step" {
|
||||
var fx = try parseFixture("(//a)[1]");
|
||||
defer fx.arena.deinit();
|
||||
// Top level is Filter wrapping a parenthesized path with one predicate.
|
||||
const filt = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
|
||||
try testing.expect(filt.expr.path.absolute);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with multi-predicate nests" {
|
||||
var fx = try parseFixture("(//a)[1][2]");
|
||||
defer fx.arena.deinit();
|
||||
const outer = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 2), outer.predicate.number);
|
||||
const inner = outer.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), inner.predicate.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with location-path tail (filter_path)" {
|
||||
var fx = try parseFixture("(//a)/b");
|
||||
defer fx.arena.deinit();
|
||||
const fp = fx.expr.filter_path;
|
||||
try testing.expect(fp.filter.path.absolute);
|
||||
try testing.expectEqual(@as(usize, 1), fp.steps.len);
|
||||
try testing.expectEqualStrings("b", fp.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with // tail prepends descendant-or-self" {
|
||||
var fx = try parseFixture("(//a)//b");
|
||||
defer fx.arena.deinit();
|
||||
const fp = fx.expr.filter_path;
|
||||
try testing.expectEqual(@as(usize, 2), fp.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis);
|
||||
try testing.expectEqualStrings("b", fp.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call followed by predicate" {
|
||||
var fx = try parseFixture("id('x')[1]");
|
||||
defer fx.arena.deinit();
|
||||
const filt = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
|
||||
try testing.expectEqualStrings("id", filt.expr.fn_call.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: complex representative expression" {
|
||||
var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 3), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
|
||||
try testing.expectEqualStrings("div", path.steps[1].node_test.name);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len);
|
||||
try testing.expectEqualStrings("p", path.steps[2].node_test.name);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len);
|
||||
}
|
||||
|
||||
fn expectParseError(input: []const u8, expected: anyerror) !void {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectError(expected, parse(arena.allocator(), input));
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on unbalanced paren" {
|
||||
try expectParseError("(1", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on unbalanced bracket" {
|
||||
try expectParseError("p[1", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on missing node test" {
|
||||
try expectParseError("child::", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: bare `+` falls through to step and reports missing node test" {
|
||||
// Matches polyfill: + isn't a path/primary start, so the parser
|
||||
// ends up in parseStep with no name to use as node test.
|
||||
try expectParseError("+", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on trailing tokens" {
|
||||
try expectParseError("1 2", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: empty string falls through to step and reports missing node test" {
|
||||
try expectParseError("", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: 91-case battery — every expression parses" {
|
||||
// 91-case XPath 1.0 conformance battery covering every expression
|
||||
// shape the public API surface accepts. Each entry must parse
|
||||
// without error.
|
||||
const battery = [_][]const u8{
|
||||
"/html",
|
||||
"/html/body",
|
||||
"/",
|
||||
"//h1",
|
||||
"//ul/li",
|
||||
"//ul//li",
|
||||
".",
|
||||
".//li",
|
||||
"//section/*",
|
||||
"//*[@id='heading']",
|
||||
"//li[1]/following-sibling::li",
|
||||
"//li[5]/preceding-sibling::li",
|
||||
"//li/parent::ul",
|
||||
"//li/ancestor::body",
|
||||
"//li/ancestor-or-self::body",
|
||||
"//li[3]/preceding::li",
|
||||
"//li[1]/following::li",
|
||||
"//ul/descendant::li",
|
||||
"//ul/descendant-or-self::li",
|
||||
"//section[1]/child::span",
|
||||
"//*[@id='heading']/self::h1",
|
||||
"//a[1]/attribute::href",
|
||||
"//a[1]/@*",
|
||||
"//li[1]",
|
||||
"//li[last()]",
|
||||
"//li[last() - 1]",
|
||||
"//li[position() = 1]",
|
||||
"//li[position() > 2]",
|
||||
"//li[position() mod 2 = 1]",
|
||||
"(//li)[1]",
|
||||
"(//section)[2]",
|
||||
"//li[3]/preceding-sibling::li[1]",
|
||||
"//li[5]/ancestor::*[1]",
|
||||
"//li[contains(concat(' ', @class, ' '), ' even ')][2]",
|
||||
"//*[@id='heading' and @class='primary']",
|
||||
"//*[@id='heading' or @id='p1']",
|
||||
"//section[a]",
|
||||
"//section[count(span) = 2]",
|
||||
"//ul[count(li) = 5]",
|
||||
"//tr[td[1]]",
|
||||
"//tr[td/text() = 'Bob']",
|
||||
"//*[starts-with(@id, 'link')]",
|
||||
"//*[normalize-space() = 'Hello World']",
|
||||
"//*[normalize-space(.) = 'Item 1']",
|
||||
"//*[concat(@id, '-x') = 'heading-x']",
|
||||
"//*[substring(@id, 1, 1) = 'p']",
|
||||
"//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]",
|
||||
"//p[translate(@id, 'p', 'q') = 'q1']",
|
||||
"//*[substring-before(@id, '1') = 'p']",
|
||||
"//*[substring-after(@id, 'lin') = 'k1']",
|
||||
"//tr[number(td[2]) > 28]",
|
||||
"//tr[floor(number(td[2]) div 10) = 3]",
|
||||
"//tr[ceiling(number(td[2]) div 10) = 3]",
|
||||
"//tr[round(number(td[2]) div 10) = 3]",
|
||||
"//ul[sum(li/@data-len) = 0]",
|
||||
"//p[boolean(@lang)]",
|
||||
"//*[false()]",
|
||||
"//*[name() = 'h1']",
|
||||
"//*[local-name() = 'h1']",
|
||||
"id('heading')",
|
||||
"id('heading p1')",
|
||||
"id(//em/parent::p/@id)",
|
||||
"//h1 | //title",
|
||||
"//h1 | //*[@id='p1']",
|
||||
"//*[@id='heading'] | //*[@id='heading']",
|
||||
"//li[position() + 1 = 3]",
|
||||
"//li[position() - 1 = 0]",
|
||||
"//li[position() * 2 = 4]",
|
||||
"//li[position() div 2 = 1]",
|
||||
"//li[(position() mod 2) = 0]",
|
||||
"//tr[number(td[2]) = 30]",
|
||||
"//tr[number(td[2]) != 30]",
|
||||
"//tr[number(td[2]) < 30]",
|
||||
"//tr[number(td[2]) <= 30]",
|
||||
"//tr[number(td[2]) > 30]",
|
||||
"//tr[number(td[2]) >= 30]",
|
||||
"//tr[td[2] = 30]",
|
||||
"//tr[td[2] = '30']",
|
||||
"//comment()",
|
||||
".//a[contains(normalize-space(string(.)), 'Click me')]",
|
||||
".//input[(./@type = 'text')]",
|
||||
".//*[@id='heading']",
|
||||
".//li[contains(concat(' ', @class, ' '), ' even ')]",
|
||||
"//*[@id='heading']/text()",
|
||||
"//em/parent::p",
|
||||
"//p[em]",
|
||||
"//p[not(em)]",
|
||||
"//section[a/@href = '/foo']",
|
||||
"//ul/li[last()][position() = last()]",
|
||||
"//ul[string(count(li)) = '5']",
|
||||
"//body[count(//*[contains(@class, 'item')]) = 5]",
|
||||
};
|
||||
try testing.expectEqual(@as(usize, 91), battery.len);
|
||||
|
||||
for (battery) |expr| {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
_ = parse(arena.allocator(), expr) catch |err| {
|
||||
std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) });
|
||||
return err;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: deep parenthesization rejected past max_depth" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var buf: std.ArrayList(u8) = .empty;
|
||||
defer buf.deinit(testing.allocator);
|
||||
try buf.appendNTimes(testing.allocator, '(', max_depth + 1);
|
||||
try buf.append(testing.allocator, '1');
|
||||
try buf.appendNTimes(testing.allocator, ')', max_depth + 1);
|
||||
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
|
||||
}
|
||||
|
||||
test "XPath.Parser: deep unary minus rejected past max_depth" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var buf: std.ArrayList(u8) = .empty;
|
||||
defer buf.deinit(testing.allocator);
|
||||
try buf.appendNTimes(testing.allocator, '-', max_depth + 1);
|
||||
try buf.append(testing.allocator, '1');
|
||||
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
|
||||
}
|
||||
464
src/browser/xpath/Tokenizer.zig
Normal file
464
src/browser/xpath/Tokenizer.zig
Normal file
@@ -0,0 +1,464 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 expression tokenizer.
|
||||
//!
|
||||
//! HTML-pragmatic behavior: lenient whitespace, case-preserving names,
|
||||
//! no escape processing in string literals (use the other quote type
|
||||
//! to embed), unknown characters silently skipped.
|
||||
//!
|
||||
//! The tokenizer borrows from the input slice and never allocates.
|
||||
//! `next()` always returns a token; `.eof` is terminal and idempotent.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Tokenizer = @This();
|
||||
|
||||
pub const Token = union(enum) {
|
||||
/// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes
|
||||
/// are not interpreted (the polyfill takes the raw substring).
|
||||
string: []const u8,
|
||||
|
||||
/// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the
|
||||
/// runtime number type.
|
||||
number: f64,
|
||||
|
||||
/// Bare identifier — element/function/axis name, an `or`/`and`/
|
||||
/// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`,
|
||||
/// `prefix:*`). The colon and optional wildcard are preserved
|
||||
/// verbatim so the parser can split.
|
||||
name: []const u8,
|
||||
|
||||
slash, // `/`
|
||||
double_slash, // `//`
|
||||
dot, // `.`
|
||||
double_dot, // `..`
|
||||
at, // `@`
|
||||
lparen, // `(`
|
||||
rparen, // `)`
|
||||
lbracket, // `[`
|
||||
rbracket, // `]`
|
||||
comma, // `,`
|
||||
pipe, // `|`
|
||||
eq, // `=`
|
||||
neq, // `!=`
|
||||
lt, // `<`
|
||||
lte, // `<=`
|
||||
gt, // `>`
|
||||
gte, // `>=`
|
||||
plus, // `+`
|
||||
minus, // `-`
|
||||
star, // `*`
|
||||
dollar, // `$`
|
||||
double_colon, // `::`
|
||||
eof,
|
||||
};
|
||||
|
||||
input: []const u8,
|
||||
position: usize = 0,
|
||||
|
||||
fn isEof(self: *const Tokenizer) bool {
|
||||
return self.position >= self.input.len;
|
||||
}
|
||||
|
||||
// True iff the input has at least `n` bytes left after the current one
|
||||
// — i.e. `byteAt(n)` will not read past the end.
|
||||
fn hasAtLeast(self: *const Tokenizer, n: usize) bool {
|
||||
return self.position + n < self.input.len;
|
||||
}
|
||||
|
||||
fn byteAt(self: *const Tokenizer, offset: usize) u8 {
|
||||
return self.input[self.position + offset];
|
||||
}
|
||||
|
||||
fn skipWhitespace(self: *Tokenizer) void {
|
||||
while (!self.isEof()) {
|
||||
switch (self.input[self.position]) {
|
||||
' ', '\t', '\n', '\r' => self.position += 1,
|
||||
else => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn isNameStart(c: u8) bool {
|
||||
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
|
||||
}
|
||||
|
||||
fn isNameContinue(c: u8) bool {
|
||||
return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.';
|
||||
}
|
||||
|
||||
fn consumeString(self: *Tokenizer, quote: u8) Token {
|
||||
self.position += 1; // opening quote
|
||||
const start = self.position;
|
||||
while (!self.isEof() and self.input[self.position] != quote) {
|
||||
self.position += 1;
|
||||
}
|
||||
const value = self.input[start..self.position];
|
||||
// Closing quote skipped; at EOF we just emit what we have (polyfill parity).
|
||||
if (!self.isEof()) self.position += 1;
|
||||
return .{ .string = value };
|
||||
}
|
||||
|
||||
fn consumeNumber(self: *Tokenizer) Token {
|
||||
const start = self.position;
|
||||
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
if (!self.isEof() and self.input[self.position] == '.') {
|
||||
self.position += 1;
|
||||
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
// Caller only enters consumeNumber on a digit or `.digit`, so the
|
||||
// slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by
|
||||
// parseFloat (verified against Zig 0.15.2).
|
||||
const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable;
|
||||
return .{ .number = value };
|
||||
}
|
||||
|
||||
fn consumeName(self: *Tokenizer) Token {
|
||||
const start = self.position;
|
||||
while (!self.isEof() and isNameContinue(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
|
||||
// Optional namespace prefix: `prefix:local` or `prefix:*`. A `::`
|
||||
// is the axis separator and belongs to the next token, so peek
|
||||
// for a single `:` not followed by another `:`.
|
||||
if (!self.isEof() and self.input[self.position] == ':' and
|
||||
(self.position + 1 >= self.input.len or self.input[self.position + 1] != ':'))
|
||||
{
|
||||
self.position += 1; // `:`
|
||||
if (!self.isEof() and self.input[self.position] == '*') {
|
||||
self.position += 1;
|
||||
} else {
|
||||
while (!self.isEof() and isNameContinue(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .name = self.input[start..self.position] };
|
||||
}
|
||||
|
||||
pub fn next(self: *Tokenizer) Token {
|
||||
while (true) {
|
||||
self.skipWhitespace();
|
||||
if (self.isEof()) return .eof;
|
||||
|
||||
const c = self.byteAt(0);
|
||||
|
||||
if (c == '"' or c == '\'') {
|
||||
return self.consumeString(c);
|
||||
}
|
||||
|
||||
if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) {
|
||||
return self.consumeNumber();
|
||||
}
|
||||
|
||||
if (self.hasAtLeast(1)) {
|
||||
const c2 = self.byteAt(1);
|
||||
switch (c) {
|
||||
'/' => if (c2 == '/') {
|
||||
self.position += 2;
|
||||
return .double_slash;
|
||||
},
|
||||
':' => if (c2 == ':') {
|
||||
self.position += 2;
|
||||
return .double_colon;
|
||||
},
|
||||
'!' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .neq;
|
||||
},
|
||||
'<' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .lte;
|
||||
},
|
||||
'>' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .gte;
|
||||
},
|
||||
'.' => if (c2 == '.') {
|
||||
self.position += 2;
|
||||
return .double_dot;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
const single: ?Token = switch (c) {
|
||||
'(' => .lparen,
|
||||
')' => .rparen,
|
||||
'[' => .lbracket,
|
||||
']' => .rbracket,
|
||||
',' => .comma,
|
||||
'|' => .pipe,
|
||||
'=' => .eq,
|
||||
'<' => .lt,
|
||||
'>' => .gt,
|
||||
'+' => .plus,
|
||||
'-' => .minus,
|
||||
'*' => .star,
|
||||
'$' => .dollar,
|
||||
'/' => .slash,
|
||||
'@' => .at,
|
||||
'.' => .dot,
|
||||
else => null,
|
||||
};
|
||||
if (single) |tok| {
|
||||
self.position += 1;
|
||||
return tok;
|
||||
}
|
||||
|
||||
if (isNameStart(c)) {
|
||||
return self.consumeName();
|
||||
}
|
||||
|
||||
// Polyfill parity (decision #2): unknown characters are
|
||||
// silently skipped, never an error.
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
fn expectTokens(input: []const u8, expected: []const Token) !void {
|
||||
var tokenizer = Tokenizer{ .input = input };
|
||||
for (expected) |exp| {
|
||||
const got = tokenizer.next();
|
||||
try testing.expectEqualDeep(exp, got);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: empty input emits EOF" {
|
||||
try expectTokens("", &.{.eof});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: only whitespace emits EOF" {
|
||||
try expectTokens(" \t\n\r ", &.{.eof});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: EOF idempotent past end" {
|
||||
var t = Tokenizer{ .input = "" };
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: single-char operators" {
|
||||
try expectTokens("()[],|=<>+-*$/@.", &.{
|
||||
.lparen, .rparen, .lbracket, .rbracket, .comma, .pipe,
|
||||
.eq, .lt, .gt, .plus, .minus, .star,
|
||||
.dollar, .slash, .at, .dot, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: two-char operators" {
|
||||
try expectTokens("// :: != <= >= ..", &.{
|
||||
.double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: two-char vs single-char disambiguation" {
|
||||
try expectTokens("/a/b", &.{
|
||||
.slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof });
|
||||
try expectTokens("a<b", &.{
|
||||
.{ .name = "a" }, .lt, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("a<=b", &.{
|
||||
.{ .name = "a" }, .lte, .{ .name = "b" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string literal double quote" {
|
||||
try expectTokens("\"hello world\"", &.{ .{ .string = "hello world" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string literal single quote" {
|
||||
try expectTokens("'hello world'", &.{ .{ .string = "hello world" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string embeds the other quote type" {
|
||||
try expectTokens("\"it's\"", &.{ .{ .string = "it's" }, .eof });
|
||||
try expectTokens("'say \"hi\"'", &.{ .{ .string = "say \"hi\"" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: empty string literal" {
|
||||
try expectTokens("''", &.{ .{ .string = "" }, .eof });
|
||||
try expectTokens("\"\"", &.{ .{ .string = "" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: unterminated string emits partial — polyfill parity" {
|
||||
try expectTokens("'unterminated", &.{ .{ .string = "unterminated" }, .eof });
|
||||
try expectTokens("\"oops", &.{ .{ .string = "oops" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: integer literals" {
|
||||
try expectTokens("0", &.{ .{ .number = 0 }, .eof });
|
||||
try expectTokens("42", &.{ .{ .number = 42 }, .eof });
|
||||
try expectTokens("12345", &.{ .{ .number = 12345 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: float literals" {
|
||||
try expectTokens("3.14", &.{ .{ .number = 3.14 }, .eof });
|
||||
try expectTokens("0.5", &.{ .{ .number = 0.5 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: leading-dot float (.5)" {
|
||||
try expectTokens(".5", &.{ .{ .number = 0.5 }, .eof });
|
||||
try expectTokens(".25", &.{ .{ .number = 0.25 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: trailing-dot float (5.)" {
|
||||
try expectTokens("5.", &.{ .{ .number = 5 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: leading zeros are decimal, not octal" {
|
||||
try expectTokens("007", &.{ .{ .number = 7 }, .eof });
|
||||
try expectTokens("0042", &.{ .{ .number = 42 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: multi-digit fraction parses with parseFloat precision" {
|
||||
// Anchors that the slice is round-tripped through parseFloat (the
|
||||
// polyfill calls Number()). The old hand-rolled `place *= 0.1`
|
||||
// accumulator drifted on long fractions.
|
||||
try expectTokens("0.123456789", &.{ .{ .number = 0.123456789 }, .eof });
|
||||
try expectTokens("123.456", &.{ .{ .number = 123.456 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: dot followed by non-digit emits dot token" {
|
||||
try expectTokens(".x", &.{ .dot, .{ .name = "x" }, .eof });
|
||||
try expectTokens(".", &.{ .dot, .eof });
|
||||
try expectTokens(". 3", &.{ .dot, .{ .number = 3 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: bare identifier" {
|
||||
try expectTokens("foo", &.{ .{ .name = "foo" }, .eof });
|
||||
try expectTokens("_x", &.{ .{ .name = "_x" }, .eof });
|
||||
try expectTokens("MixedCase", &.{ .{ .name = "MixedCase" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: identifier with digits, dashes, dots" {
|
||||
try expectTokens("foo-bar", &.{ .{ .name = "foo-bar" }, .eof });
|
||||
try expectTokens("foo.bar", &.{ .{ .name = "foo.bar" }, .eof });
|
||||
try expectTokens("a1b2", &.{ .{ .name = "a1b2" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: namespace-prefixed name" {
|
||||
try expectTokens("xhtml:div", &.{ .{ .name = "xhtml:div" }, .eof });
|
||||
try expectTokens("svg:*", &.{ .{ .name = "svg:*" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: name followed by `::` keeps the colon for the axis token" {
|
||||
try expectTokens("child::node", &.{
|
||||
.{ .name = "child" }, .double_colon, .{ .name = "node" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: name immediately followed by `(` is two tokens" {
|
||||
// Function-call detection happens in the parser.
|
||||
try expectTokens("count()", &.{
|
||||
.{ .name = "count" }, .lparen, .rparen, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: keywords or/and/div/mod tokenize as plain names" {
|
||||
try expectTokens("a or b", &.{
|
||||
.{ .name = "a" }, .{ .name = "or" }, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("3 div 4", &.{
|
||||
.{ .number = 3 }, .{ .name = "div" }, .{ .number = 4 }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: unknown character silently skipped" {
|
||||
try expectTokens("?foo", &.{ .{ .name = "foo" }, .eof });
|
||||
try expectTokens("foo?bar", &.{
|
||||
.{ .name = "foo" }, .{ .name = "bar" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: representative path expression" {
|
||||
try expectTokens("//div[@class='x']/p[2]", &.{
|
||||
.double_slash,
|
||||
.{ .name = "div" },
|
||||
.lbracket,
|
||||
.at,
|
||||
.{ .name = "class" },
|
||||
.eq,
|
||||
.{ .string = "x" },
|
||||
.rbracket,
|
||||
.slash,
|
||||
.{ .name = "p" },
|
||||
.lbracket,
|
||||
.{ .number = 2 },
|
||||
.rbracket,
|
||||
.eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: representative axis + predicate expression" {
|
||||
try expectTokens(
|
||||
"ancestor-or-self::section/following-sibling::*[position()<=last()-1]",
|
||||
&.{
|
||||
.{ .name = "ancestor-or-self" },
|
||||
.double_colon,
|
||||
.{ .name = "section" },
|
||||
.slash,
|
||||
.{ .name = "following-sibling" },
|
||||
.double_colon,
|
||||
.star,
|
||||
.lbracket,
|
||||
.{ .name = "position" },
|
||||
.lparen,
|
||||
.rparen,
|
||||
.lte,
|
||||
.{ .name = "last" },
|
||||
.lparen,
|
||||
.rparen,
|
||||
.minus,
|
||||
.{ .number = 1 },
|
||||
.rbracket,
|
||||
.eof,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: parent-axis abbreviation" {
|
||||
try expectTokens("../foo", &.{
|
||||
.double_dot, .slash, .{ .name = "foo" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: filter expression with predicate" {
|
||||
try expectTokens("(//a)[1]", &.{
|
||||
.lparen, .double_slash, .{ .name = "a" }, .rparen,
|
||||
.lbracket, .{ .number = 1 }, .rbracket, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: variable reference" {
|
||||
try expectTokens("$x + 1", &.{
|
||||
.dollar, .{ .name = "x" }, .plus, .{ .number = 1 }, .eof,
|
||||
});
|
||||
}
|
||||
133
src/browser/xpath/ast.zig
Normal file
133
src/browser/xpath/ast.zig
Normal file
@@ -0,0 +1,133 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 AST.
|
||||
//!
|
||||
//! Slices and pointers are arena-owned by the Parser; the AST has no
|
||||
//! destructor.
|
||||
|
||||
pub const Expr = union(enum) {
|
||||
/// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`.
|
||||
path: Path,
|
||||
/// Filter expression followed by a location-path tail:
|
||||
/// `(//a)/b`, `(expr)//c`.
|
||||
filter_path: FilterPath,
|
||||
/// Filter expression with a single predicate: `(expr)[n]`.
|
||||
/// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2).
|
||||
filter: Filter,
|
||||
binop: BinOp,
|
||||
/// Unary minus. The polyfill has no unary `+`.
|
||||
neg: *Expr,
|
||||
/// String literal, quotes stripped.
|
||||
literal: []const u8,
|
||||
/// Numeric literal, parsed to f64.
|
||||
number: f64,
|
||||
/// Variable reference. The leading `$` is stripped; per decision #3
|
||||
/// the evaluator always returns the empty string.
|
||||
var_ref: []const u8,
|
||||
fn_call: FnCall,
|
||||
};
|
||||
|
||||
pub const Path = struct {
|
||||
absolute: bool,
|
||||
steps: []const Step,
|
||||
};
|
||||
|
||||
pub const FilterPath = struct {
|
||||
filter: *Expr,
|
||||
steps: []const Step,
|
||||
};
|
||||
|
||||
pub const Filter = struct {
|
||||
expr: *Expr,
|
||||
predicate: *Expr,
|
||||
};
|
||||
|
||||
pub const BinOp = struct {
|
||||
op: BinOpKind,
|
||||
left: *Expr,
|
||||
right: *Expr,
|
||||
};
|
||||
|
||||
pub const BinOpKind = enum {
|
||||
or_,
|
||||
and_,
|
||||
eq,
|
||||
neq,
|
||||
lt,
|
||||
gt,
|
||||
lte,
|
||||
gte,
|
||||
add,
|
||||
sub,
|
||||
mul,
|
||||
div,
|
||||
mod,
|
||||
union_,
|
||||
};
|
||||
|
||||
pub const FnCall = struct {
|
||||
name: []const u8,
|
||||
args: []const *Expr,
|
||||
};
|
||||
|
||||
pub const Step = struct {
|
||||
axis: Axis,
|
||||
node_test: NodeTest,
|
||||
predicates: []const *Expr,
|
||||
};
|
||||
|
||||
pub const Axis = enum {
|
||||
child,
|
||||
descendant,
|
||||
descendant_or_self,
|
||||
self,
|
||||
parent,
|
||||
ancestor,
|
||||
ancestor_or_self,
|
||||
following_sibling,
|
||||
preceding_sibling,
|
||||
following,
|
||||
preceding,
|
||||
attribute,
|
||||
namespace,
|
||||
/// Polyfill parity (decision #2): unknown axis names parse to
|
||||
/// this variant; the evaluator returns an empty node-set.
|
||||
unknown,
|
||||
};
|
||||
|
||||
pub const NodeTest = union(enum) {
|
||||
/// Element / attribute name. `"*"` is the wildcard. Namespaced forms
|
||||
/// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator
|
||||
/// does not split them, so they fall through to a literal `mem.eql`
|
||||
/// against the node name (consistent with the `namespace::` axis stub
|
||||
/// per decision #3).
|
||||
/// TODO: real namespace support if the polyfill ever drops the stub.
|
||||
name: []const u8,
|
||||
/// `node()`, `text()`, `comment()`, `processing-instruction()`.
|
||||
/// The optional target literal of `processing-instruction("foo")`
|
||||
/// is consumed but not stored (decision #3 stub).
|
||||
type_test: TypeTest,
|
||||
};
|
||||
|
||||
pub const TypeTest = enum {
|
||||
node,
|
||||
text,
|
||||
comment,
|
||||
processing_instruction,
|
||||
};
|
||||
630
src/browser/xpath/functions.zig
Normal file
630
src/browser/xpath/functions.zig
Normal file
@@ -0,0 +1,630 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 core function library — 25 functions covering the spec's
|
||||
//! core function set. `position()` and `last()` live in
|
||||
//! `Evaluator.evalFnCall` because they need the `(pos, size)` closure
|
||||
//! that this module never sees.
|
||||
//!
|
||||
//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager
|
||||
//! evaluation is fine here — short-circuit operators (`or`/`and`) are
|
||||
//! binops, not function calls, so laziness isn't required. The
|
||||
//! pre-evaluation contract also keeps functions.zig free of a circular
|
||||
//! import on Evaluator.zig.
|
||||
//!
|
||||
//! Stubs per decision #3:
|
||||
//! - `lang(string)` → always false
|
||||
//! - `namespace-uri(...)` → always ""
|
||||
//! - `name`/`local-name` → lowercased (HTML pragmatism)
|
||||
//!
|
||||
//! Allocations land in the caller's per-evaluation arena.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const result = @import("result.zig");
|
||||
|
||||
const Frame = lp.Frame;
|
||||
const Element = Node.Element;
|
||||
const Document = Node.Document;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
WriteFailed,
|
||||
StringTooLarge,
|
||||
UnknownFunction,
|
||||
};
|
||||
|
||||
/// Dispatch a core-library function call. Returns `error.UnknownFunction`
|
||||
/// if `name` doesn't match — the caller (Evaluator) handles
|
||||
/// `position()` / `last()` inline before getting here, so this is the
|
||||
/// last lookup stop.
|
||||
pub fn call(
|
||||
arena: Allocator,
|
||||
name: []const u8,
|
||||
args: []const result.Result,
|
||||
ctx: *Node,
|
||||
frame: *Frame,
|
||||
) Error!result.Result {
|
||||
// -- Node-set --
|
||||
if (eql(name, "count")) return .{ .number = countFn(args) };
|
||||
if (eql(name, "id")) return idFn(arena, args, ctx, frame);
|
||||
if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) };
|
||||
if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) };
|
||||
if (eql(name, "namespace-uri")) return .{ .string = "" };
|
||||
|
||||
// -- String --
|
||||
if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) };
|
||||
if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) };
|
||||
if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) };
|
||||
if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) };
|
||||
if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) };
|
||||
if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) };
|
||||
if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) };
|
||||
if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) };
|
||||
if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) };
|
||||
if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) };
|
||||
|
||||
// -- Boolean --
|
||||
if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) };
|
||||
if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) };
|
||||
if (eql(name, "true")) return .{ .boolean = true };
|
||||
if (eql(name, "false")) return .{ .boolean = false };
|
||||
if (eql(name, "lang")) return .{ .boolean = false };
|
||||
|
||||
// -- Number --
|
||||
if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) };
|
||||
if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) };
|
||||
if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) };
|
||||
if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) };
|
||||
if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) };
|
||||
|
||||
return error.UnknownFunction;
|
||||
}
|
||||
|
||||
inline fn eql(a: []const u8, b: []const u8) bool {
|
||||
return std.mem.eql(u8, a, b);
|
||||
}
|
||||
|
||||
// ----- node-set fns -----
|
||||
|
||||
fn countFn(args: []const result.Result) f64 {
|
||||
if (args.len == 0 or args[0] != .node_set) return 0;
|
||||
return @floatFromInt(args[0].node_set.len);
|
||||
}
|
||||
|
||||
fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result {
|
||||
if (args.len == 0) return .{ .node_set = &.{} };
|
||||
|
||||
// Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar
|
||||
// arg → `toStr`. Then split on whitespace and look up each token.
|
||||
const id_str: []const u8 = blk: {
|
||||
if (args[0] == .node_set) {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (args[0].node_set, 0..) |n, i| {
|
||||
if (i > 0) try buf.writer.writeByte(' ');
|
||||
const sv = try result.stringValueOf(arena, n);
|
||||
try buf.writer.writeAll(sv);
|
||||
}
|
||||
break :blk buf.written();
|
||||
}
|
||||
break :blk try result.toString(arena, args[0]);
|
||||
};
|
||||
|
||||
// `ctx.ownerDocument || ctx` — document nodes own themselves.
|
||||
const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} });
|
||||
|
||||
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace);
|
||||
while (it.next()) |tok| {
|
||||
if (doc.getElementById(tok, frame)) |el| {
|
||||
try seen.put(arena, el.asNode(), {});
|
||||
}
|
||||
}
|
||||
return .{ .node_set = seen.keys() };
|
||||
}
|
||||
|
||||
fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const node = firstNodeOrCtx(args, ctx) orelse return "";
|
||||
// For Element, `getLocalName` returns a slice into `_tag_name`
|
||||
// (lowercase, namespace-prefix stripped) — lifetime exceeds the
|
||||
// per-evaluation arena, so we borrow instead of duping.
|
||||
if (node.is(Element)) |el| return el.getLocalName();
|
||||
var buf: [256]u8 = undefined;
|
||||
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
|
||||
}
|
||||
|
||||
fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const node = firstNodeOrCtx(args, ctx) orelse return "";
|
||||
// Diverges from `local-name` only on namespaced elements: `name`
|
||||
// keeps the prefix (`ns:foo`), `local-name` strips it (`foo`).
|
||||
if (node.is(Element)) |el| return el.getTagNameLower();
|
||||
var buf: [256]u8 = undefined;
|
||||
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
|
||||
}
|
||||
|
||||
fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node {
|
||||
if (args.len == 0) return ctx;
|
||||
if (args[0] != .node_set) return null;
|
||||
if (args[0].node_set.len == 0) return null;
|
||||
return args[0].node_set[0];
|
||||
}
|
||||
|
||||
// ----- string fns -----
|
||||
|
||||
fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
if (args.len == 0) return try result.stringValueOf(arena, ctx);
|
||||
return try result.toString(arena, args[0]);
|
||||
}
|
||||
|
||||
fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (args) |a| {
|
||||
const s = try result.toString(arena, a);
|
||||
try buf.writer.writeAll(s);
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool {
|
||||
if (args.len < 2) return false;
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
return std.mem.startsWith(u8, s1, s2);
|
||||
}
|
||||
|
||||
fn containsFn(arena: Allocator, args: []const result.Result) Error!bool {
|
||||
if (args.len < 2) return false;
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
return std.mem.indexOf(u8, s1, s2) != null;
|
||||
}
|
||||
|
||||
fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
if (std.mem.indexOf(u8, s1, s2)) |idx| {
|
||||
return s1[0..idx];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
if (std.mem.indexOf(u8, s1, s2)) |idx| {
|
||||
return s1[idx + s2.len ..];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s = try result.toString(arena, args[0]);
|
||||
const start_raw = try result.toNumber(arena, args[1]);
|
||||
if (std.math.isNan(start_raw)) return "";
|
||||
const start = roundHalfToPosInf(start_raw);
|
||||
|
||||
const s_len: f64 = @floatFromInt(s.len);
|
||||
if (args.len >= 3) {
|
||||
const len_raw = try result.toNumber(arena, args[2]);
|
||||
if (std.math.isNan(len_raw)) return "";
|
||||
const len = roundHalfToPosInf(len_raw);
|
||||
const sum = start - 1 + len;
|
||||
// -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior.
|
||||
if (std.math.isNan(sum)) return "";
|
||||
const si_f = @max(start - 1, 0);
|
||||
const ei_f = @min(sum, s_len);
|
||||
if (si_f >= ei_f) return "";
|
||||
const si: usize = @intFromFloat(si_f);
|
||||
const ei: usize = @intFromFloat(ei_f);
|
||||
return s[si..ei];
|
||||
}
|
||||
|
||||
const si_f = @max(start - 1, 0);
|
||||
if (si_f >= s_len) return "";
|
||||
const si: usize = @intFromFloat(si_f);
|
||||
return s[si..];
|
||||
}
|
||||
|
||||
fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
|
||||
const s = if (args.len == 0)
|
||||
try result.stringValueOf(arena, ctx)
|
||||
else
|
||||
try result.toString(arena, args[0]);
|
||||
// Polyfill returns UTF-16 code units; we return UTF-8 bytes. They
|
||||
// agree on ASCII (the gem's 91-case battery is ASCII-only). See
|
||||
// .claude/skills/xpath-port/NOTES.md for the divergence rationale.
|
||||
return @floatFromInt(s.len);
|
||||
}
|
||||
|
||||
fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const s = if (args.len == 0)
|
||||
try result.stringValueOf(arena, ctx)
|
||||
else
|
||||
try result.toString(arena, args[0]);
|
||||
|
||||
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
|
||||
if (trimmed.len == 0) return "";
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
var prev_space = false;
|
||||
for (trimmed) |c| {
|
||||
if (std.ascii.isWhitespace(c)) {
|
||||
if (!prev_space) try buf.writer.writeByte(' ');
|
||||
prev_space = true;
|
||||
} else {
|
||||
try buf.writer.writeByte(c);
|
||||
prev_space = false;
|
||||
}
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 3) return "";
|
||||
const s = try result.toString(arena, args[0]);
|
||||
const from = try result.toString(arena, args[1]);
|
||||
const to = try result.toString(arena, args[2]);
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (s) |c| {
|
||||
if (std.mem.indexOfScalar(u8, from, c)) |idx| {
|
||||
// Chars in `from` past `to.len` are deleted (no copy).
|
||||
if (idx < to.len) try buf.writer.writeByte(to[idx]);
|
||||
} else {
|
||||
try buf.writer.writeByte(c);
|
||||
}
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
// ----- number fns -----
|
||||
|
||||
fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
|
||||
if (args.len == 0) {
|
||||
const sv = try result.stringValueOf(arena, ctx);
|
||||
return result.stringToNumber(sv);
|
||||
}
|
||||
return try result.toNumber(arena, args[0]);
|
||||
}
|
||||
|
||||
fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 {
|
||||
if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64);
|
||||
var total: f64 = 0;
|
||||
for (args[0].node_set) |n| {
|
||||
const sv = try result.stringValueOf(arena, n);
|
||||
total += result.stringToNumber(sv);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Round half toward positive infinity. Matches JS `Math.round` (the
|
||||
/// polyfill calls it for both `round()` and `substring()`):
|
||||
/// round(0.5) = 1 round(-0.5) = 0
|
||||
/// round(1.5) = 2 round(-1.5) = -1
|
||||
/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`.
|
||||
fn roundHalfToPosInf(n: f64) f64 {
|
||||
if (std.math.isNan(n) or !std.math.isFinite(n)) return n;
|
||||
return std.math.floor(n + 0.5);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests — pure-logic only. Functions that need a real DOM (id, name,
|
||||
// local-name, string with element ctx, sum, count of node-set, etc.)
|
||||
// are exercised via Phase 9 HTML fixtures in tests/xpath/.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
const Parser = @import("Parser.zig");
|
||||
const Evaluator = @import("Evaluator.zig");
|
||||
|
||||
fn evalScalar(a: Allocator, src: []const u8) !result.Result {
|
||||
const expr = try Parser.parse(a, src);
|
||||
// Synthetic Frame/Node pointers — the public `evaluate` entry only
|
||||
// touches the Frame for path/axis evaluation. Pure-scalar expressions
|
||||
// (arithmetic, function calls returning scalars) never deref it.
|
||||
return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000));
|
||||
}
|
||||
|
||||
test "Functions: count() of non-node-set returns 0" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const r = try evalScalar(arena.allocator(), "count('hello')");
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
|
||||
test "Functions: string() on scalar coerces" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "string(42)", "42" },
|
||||
.{ "string(3.14)", "3.14" },
|
||||
.{ "string(true())", "true" },
|
||||
.{ "string(false())", "false" },
|
||||
.{ "string('hello')", "hello" },
|
||||
.{ "string(0)", "0" },
|
||||
.{ "string(-1)", "-1" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: concat() variadic" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "concat('a', 'b')", "ab" },
|
||||
.{ "concat('a', 'b', 'c')", "abc" },
|
||||
.{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" },
|
||||
.{ "concat('x', 1, 'y')", "x1y" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: starts-with / contains" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "starts-with('hello', 'he')", true },
|
||||
.{ "starts-with('hello', 'el')", false },
|
||||
.{ "starts-with('hello', '')", true },
|
||||
.{ "contains('hello world', 'wor')", true },
|
||||
.{ "contains('hello', 'xyz')", false },
|
||||
.{ "contains('hello', '')", true },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: substring-before / substring-after" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "substring-before('1999/04/01', '/')", "1999" },
|
||||
.{ "substring-before('hello', 'xyz')", "" },
|
||||
.{ "substring-after('1999/04/01', '/')", "04/01" },
|
||||
.{ "substring-after('hello', 'xyz')", "" },
|
||||
.{ "substring-after('hello', '')", "hello" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: substring() — XPath 1-based, rounding, NaN handling" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "substring('12345', 2, 3)", "234" },
|
||||
.{ "substring('12345', 2)", "2345" },
|
||||
// XPath spec example: round(1.5) = 2 → start at pos 2, len 2.
|
||||
.{ "substring('12345', 1.5, 2.6)", "234" },
|
||||
// start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2.
|
||||
.{ "substring('12345', 0, 3)", "12" },
|
||||
// Negative start clamps to 0.
|
||||
.{ "substring('12345', -3, 7)", "123" },
|
||||
// NaN start.
|
||||
.{ "substring('12345', 'foo')", "" },
|
||||
// NaN length.
|
||||
.{ "substring('12345', 1, 'foo')", "" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: string-length on scalar arg" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "string-length('hello')", 5 },
|
||||
.{ "string-length('')", 0 },
|
||||
.{ "string-length('a b c')", 5 },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: normalize-space" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "normalize-space(' hello world ')", "hello world" },
|
||||
.{ "normalize-space('hello')", "hello" },
|
||||
.{ "normalize-space('')", "" },
|
||||
.{ "normalize-space(' ')", "" },
|
||||
.{ "normalize-space('a\tb\nc')", "a b c" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: translate" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
// Standard XPath spec example.
|
||||
.{ "translate('bar', 'abc', 'ABC')", "BAr" },
|
||||
// Char in `from` past `to.len` is deleted.
|
||||
.{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" },
|
||||
.{ "translate('hello', '', '')", "hello" },
|
||||
// Identity.
|
||||
.{ "translate('abc', 'abc', 'abc')", "abc" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: boolean / not / true / false / lang" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "true()", true },
|
||||
.{ "false()", false },
|
||||
.{ "not(true())", false },
|
||||
.{ "not(false())", true },
|
||||
.{ "boolean(1)", true },
|
||||
.{ "boolean(0)", false },
|
||||
.{ "boolean('')", false },
|
||||
.{ "boolean('x')", true },
|
||||
// lang is a stub — always false.
|
||||
.{ "lang('en')", false },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: number() on scalar arg" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "number('42')");
|
||||
try testing.expectEqual(@as(f64, 42), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number(true())");
|
||||
try testing.expectEqual(@as(f64, 1), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number(false())");
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number('foo')");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: floor / ceiling / round" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "floor(1.5)", 1 },
|
||||
.{ "floor(-1.5)", -2 },
|
||||
.{ "floor(0)", 0 },
|
||||
.{ "ceiling(1.5)", 2 },
|
||||
.{ "ceiling(-1.5)", -1 },
|
||||
.{ "ceiling(0)", 0 },
|
||||
// Half-toward-positive-infinity (JS Math.round behavior).
|
||||
.{ "round(0.5)", 1 },
|
||||
.{ "round(-0.5)", 0 },
|
||||
.{ "round(1.5)", 2 },
|
||||
.{ "round(-1.5)", -1 },
|
||||
.{ "round(2.5)", 3 },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: round/floor/ceiling propagate NaN and Infinity" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "round(1 div 0)"); // +Infinity
|
||||
try testing.expect(std.math.isPositiveInf(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "round(0 div 0)"); // NaN
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "floor(0 div 0)");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "ceiling(0 div 0)");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: sum / count on non-node-set defaults" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "sum('hello')");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "count('hello')");
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: roundHalfToPosInf" {
|
||||
try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5));
|
||||
try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5));
|
||||
try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5));
|
||||
try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5));
|
||||
try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5));
|
||||
try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64))));
|
||||
try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64))));
|
||||
try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64))));
|
||||
}
|
||||
199
src/browser/xpath/result.zig
Normal file
199
src/browser/xpath/result.zig
Normal file
@@ -0,0 +1,199 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 runtime values.
|
||||
//!
|
||||
//! Tagged union over the four XPath value types: node-set, number,
|
||||
//! string, boolean. Type coercion (`toString`, `toNumber`, `toBoolean`)
|
||||
//! follows XPath 1.0 spec §3, with HTML-pragmatic shortcuts (decision
|
||||
//! #2).
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const CData = Node.CData;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const Result = union(enum) {
|
||||
/// Owned by the evaluator's arena. Order is significant only at the
|
||||
/// public boundary, where the evaluator sorts to document order.
|
||||
node_set: []const *Node,
|
||||
number: f64,
|
||||
string: []const u8,
|
||||
boolean: bool,
|
||||
};
|
||||
|
||||
/// XPath spec §5: string-value of a node.
|
||||
///
|
||||
/// - Element / Document: concatenated text descendants (excluding
|
||||
/// comments and processing-instructions; matches `Node.getTextContent`)
|
||||
/// - Attribute: attribute value
|
||||
/// - Text / Comment / CDATA / PI: the node's data
|
||||
/// - DocumentType / DocumentFragment: empty (matches polyfill's
|
||||
/// `nodeValue || textContent || ''` fallthrough)
|
||||
///
|
||||
/// The returned slice is borrowed from the node for cdata/attribute
|
||||
/// (cheap, no allocation) and arena-allocated for element/document
|
||||
/// (concatenation buffer).
|
||||
pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 {
|
||||
return switch (node._type) {
|
||||
.attribute => |attr| attr._value.str(),
|
||||
.cdata => |cd| cd._data.str(),
|
||||
.element, .document => blk: {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
try node.getTextContent(&buf.writer);
|
||||
break :blk buf.written();
|
||||
},
|
||||
.document_type, .document_fragment => "",
|
||||
};
|
||||
}
|
||||
|
||||
pub fn toBoolean(val: Result) bool {
|
||||
return switch (val) {
|
||||
.boolean => |b| b,
|
||||
.number => |n| n != 0 and !std.math.isNan(n),
|
||||
.string => |s| s.len > 0,
|
||||
.node_set => |ns| ns.len > 0,
|
||||
};
|
||||
}
|
||||
|
||||
/// Numeric coercion. Empty / whitespace-only strings produce NaN
|
||||
/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying
|
||||
/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`).
|
||||
pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 {
|
||||
return switch (val) {
|
||||
.number => |n| n,
|
||||
.boolean => |b| if (b) 1 else 0,
|
||||
.string => |s| stringToNumber(s),
|
||||
.node_set => |ns| blk: {
|
||||
if (ns.len == 0) break :blk std.math.nan(f64);
|
||||
const sv = try stringValueOf(arena, ns[0]);
|
||||
break :blk stringToNumber(sv);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn stringToNumber(s: []const u8) f64 {
|
||||
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
|
||||
if (trimmed.len == 0) return std.math.nan(f64);
|
||||
return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64);
|
||||
}
|
||||
|
||||
/// String coercion. Allocates only for `.number` (formatting) and for
|
||||
/// `.node_set` whose first node is an Element/Document (text content
|
||||
/// concatenation). Boolean → static string. String → borrowed.
|
||||
pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 {
|
||||
return switch (val) {
|
||||
.string => |s| s,
|
||||
.boolean => |b| if (b) "true" else "false",
|
||||
.number => |n| try numberToString(arena, n),
|
||||
.node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]),
|
||||
};
|
||||
}
|
||||
|
||||
/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings;
|
||||
/// integer-valued numbers print without trailing `.0`. Diverges from
|
||||
/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`.
|
||||
pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 {
|
||||
if (std.math.isNan(n)) return "NaN";
|
||||
if (std.math.isPositiveInf(n)) return "Infinity";
|
||||
if (std.math.isNegativeInf(n)) return "-Infinity";
|
||||
if (n == 0) return "0"; // covers +0 and -0
|
||||
if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) {
|
||||
return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))});
|
||||
}
|
||||
return std.fmt.allocPrint(arena, "{d}", .{n});
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
test "Result: toBoolean" {
|
||||
try testing.expect(toBoolean(.{ .boolean = true }));
|
||||
try testing.expect(!toBoolean(.{ .boolean = false }));
|
||||
try testing.expect(toBoolean(.{ .number = 1 }));
|
||||
try testing.expect(!toBoolean(.{ .number = 0 }));
|
||||
try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) }));
|
||||
try testing.expect(toBoolean(.{ .string = "x" }));
|
||||
try testing.expect(!toBoolean(.{ .string = "" }));
|
||||
try testing.expect(!toBoolean(.{ .node_set = &.{} }));
|
||||
}
|
||||
|
||||
test "Result: stringToNumber" {
|
||||
try testing.expectEqual(@as(f64, 42), stringToNumber("42"));
|
||||
try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14"));
|
||||
try testing.expectEqual(@as(f64, -1), stringToNumber("-1"));
|
||||
try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 "));
|
||||
try testing.expect(std.math.isNan(stringToNumber("")));
|
||||
try testing.expect(std.math.isNan(stringToNumber(" ")));
|
||||
try testing.expect(std.math.isNan(stringToNumber("abc")));
|
||||
}
|
||||
|
||||
test "Result: numberToString — integers print without decimal" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("5", try numberToString(a, 5));
|
||||
try testing.expectEqualStrings("0", try numberToString(a, 0));
|
||||
try testing.expectEqualStrings("0", try numberToString(a, -0.0));
|
||||
try testing.expectEqualStrings("-1", try numberToString(a, -1));
|
||||
try testing.expectEqualStrings("42", try numberToString(a, 42.0));
|
||||
}
|
||||
|
||||
test "Result: numberToString — special values" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64)));
|
||||
try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64)));
|
||||
try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64)));
|
||||
}
|
||||
|
||||
test "Result: numberToString — floats" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("3.14", try numberToString(a, 3.14));
|
||||
try testing.expectEqualStrings("0.5", try numberToString(a, 0.5));
|
||||
}
|
||||
|
||||
test "Result: toString — boolean returns static string" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true }));
|
||||
try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false }));
|
||||
}
|
||||
|
||||
test "Result: toString — node-set with empty arr is empty" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} }));
|
||||
}
|
||||
|
||||
test "Result: toNumber — empty node-set is NaN" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} })));
|
||||
}
|
||||
|
||||
test "Result: toNumber — boolean coerces to 0/1" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true }));
|
||||
try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false }));
|
||||
}
|
||||
@@ -27,6 +27,7 @@ const dump = @import("../../browser/dump.zig");
|
||||
const js = @import("../../browser/js/js.zig");
|
||||
const DOMNode = @import("../../browser/webapi/Node.zig");
|
||||
const Selector = @import("../../browser/webapi/selector/Selector.zig");
|
||||
const xpath = @import("../../browser/xpath/Evaluator.zig");
|
||||
|
||||
const log = lp.log;
|
||||
const Allocator = std.mem.Allocator;
|
||||
@@ -91,6 +92,56 @@ fn getDocument(cmd: *CDP.Command) !void {
|
||||
return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{});
|
||||
}
|
||||
|
||||
// Closed set of XPath 1.0 named axes. Matched literally before `::` so
|
||||
// CSS pseudo-elements (`a::before`, `div::first-line`) don't get
|
||||
// misrouted to the XPath evaluator just because they have an
|
||||
// identifier-looking word before `::`.
|
||||
const xpath_axis_names = std.StaticStringMap(void).initComptime(.{
|
||||
.{ "child", {} },
|
||||
.{ "descendant", {} },
|
||||
.{ "descendant-or-self", {} },
|
||||
.{ "self", {} },
|
||||
.{ "parent", {} },
|
||||
.{ "ancestor", {} },
|
||||
.{ "ancestor-or-self", {} },
|
||||
.{ "following-sibling", {} },
|
||||
.{ "preceding-sibling", {} },
|
||||
.{ "following", {} },
|
||||
.{ "preceding", {} },
|
||||
.{ "attribute", {} },
|
||||
.{ "namespace", {} },
|
||||
});
|
||||
|
||||
// Heuristic (decision #2/#9): treat the query as XPath when it begins
|
||||
// with a path operator or contains an axis specifier; otherwise fall
|
||||
// through to CSS.
|
||||
fn isXPathQuery(q: []const u8) bool {
|
||||
if (q.len == 0) return false;
|
||||
if (q[0] == '/') return true;
|
||||
if (q[0] == '.' and q.len > 1 and q[1] == '/') return true;
|
||||
if (q[0] == '(' and q.len > 1) {
|
||||
if (q[1] == '/') return true;
|
||||
if (q[1] == '.' and q.len > 2 and q[2] == '/') return true;
|
||||
}
|
||||
// For `::` to be an XPath axis separator, the identifier immediately
|
||||
// before it must be one of the 13 named axes. Walk back the run of
|
||||
// [a-zA-Z-] characters and look it up in the closed set.
|
||||
var idx: usize = 0;
|
||||
while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) {
|
||||
if (hit == 0) continue;
|
||||
var start = hit;
|
||||
while (start > 0) {
|
||||
const c = q[start - 1];
|
||||
const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-';
|
||||
if (!is_axis_char) break;
|
||||
start -= 1;
|
||||
}
|
||||
if (start == hit) continue;
|
||||
if (xpath_axis_names.has(q[start..hit])) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch
|
||||
fn performSearch(cmd: *CDP.Command) !void {
|
||||
const params = (try cmd.params(struct {
|
||||
@@ -100,15 +151,23 @@ fn performSearch(cmd: *CDP.Command) !void {
|
||||
|
||||
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
|
||||
const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded;
|
||||
const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame);
|
||||
const root = frame.window._document.asNode();
|
||||
|
||||
if (isXPathQuery(params.query)) {
|
||||
const arena = try frame.getArena(.medium, "DOM.performSearch");
|
||||
defer frame.releaseArena(arena);
|
||||
const nodes = try xpath.searchAll(arena, root, params.query, frame);
|
||||
return finishSearch(cmd, bc, nodes);
|
||||
}
|
||||
|
||||
const list = try Selector.querySelectorAll(root, params.query, frame);
|
||||
defer list.deinit(frame._page);
|
||||
return finishSearch(cmd, bc, list._nodes);
|
||||
}
|
||||
|
||||
const search = try bc.node_search_list.create(list._nodes);
|
||||
|
||||
// dispatch setChildNodesEvents to inform the client of the subpart of node
|
||||
// tree covering the results.
|
||||
try dispatchSetChildNodes(cmd, list._nodes);
|
||||
|
||||
fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void {
|
||||
const search = try bc.node_search_list.create(nodes);
|
||||
try dispatchSetChildNodes(cmd, nodes);
|
||||
return cmd.sendResult(.{
|
||||
.searchId = search.name,
|
||||
.resultCount = @as(u32, @intCast(search.node_ids.len)),
|
||||
@@ -616,6 +675,78 @@ test "cdp.dom: search flow" {
|
||||
try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 });
|
||||
}
|
||||
|
||||
test "cdp.dom: performSearch with XPath" {
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
|
||||
_ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 20,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "//p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 21,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "descendant::p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 22,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "//*[@id='outer']" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 23,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 24,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "div p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 });
|
||||
}
|
||||
|
||||
test "cdp.dom: isXPathQuery heuristic" {
|
||||
// XPath-shaped queries — each line covers a distinct heuristic branch.
|
||||
try std.testing.expect(isXPathQuery("/html"));
|
||||
try std.testing.expect(isXPathQuery("//p"));
|
||||
try std.testing.expect(isXPathQuery(".//foo"));
|
||||
try std.testing.expect(isXPathQuery("(//foo)[1]"));
|
||||
try std.testing.expect(isXPathQuery("(./bar)[2]"));
|
||||
try std.testing.expect(isXPathQuery("descendant::p"));
|
||||
try std.testing.expect(isXPathQuery("ancestor-or-self::*"));
|
||||
try std.testing.expect(isXPathQuery("//*[@id='x']"));
|
||||
|
||||
// CSS-shaped queries — fall through to the existing path.
|
||||
try std.testing.expect(!isXPathQuery(""));
|
||||
try std.testing.expect(!isXPathQuery("p"));
|
||||
try std.testing.expect(!isXPathQuery("div p"));
|
||||
try std.testing.expect(!isXPathQuery("#main"));
|
||||
try std.testing.expect(!isXPathQuery(".cls"));
|
||||
try std.testing.expect(!isXPathQuery("[data-x]"));
|
||||
try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS
|
||||
try std.testing.expect(!isXPathQuery(".x")); // leading dot without /
|
||||
|
||||
// CSS pseudo-elements: identifier before `::` is not an XPath axis name.
|
||||
try std.testing.expect(!isXPathQuery("a::before"));
|
||||
try std.testing.expect(!isXPathQuery("div::after"));
|
||||
try std.testing.expect(!isXPathQuery("p::first-line"));
|
||||
try std.testing.expect(!isXPathQuery("input::placeholder"));
|
||||
// Attribute selector with `::` inside a literal — nothing axis-like before it.
|
||||
try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]"));
|
||||
}
|
||||
|
||||
test "cdp.dom: querySelector unknown search id" {
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
|
||||
@@ -58,6 +58,7 @@ pub const FetchOpts = struct {
|
||||
wait_ms: u32 = 5000,
|
||||
wait_until: ?Config.WaitUntil = null,
|
||||
wait_script: ?[:0]const u8 = null,
|
||||
inject_script: std.ArrayList([]const u8) = .{},
|
||||
wait_selector: ?[:0]const u8 = null,
|
||||
dump: dump.Opts,
|
||||
dump_mode: ?Config.DumpFormat = null,
|
||||
@@ -79,6 +80,9 @@ pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !
|
||||
}
|
||||
}
|
||||
|
||||
// Stash scripts user want to inject.
|
||||
session.inject_scripts = opts.inject_script.items;
|
||||
|
||||
const frame = try session.createPage();
|
||||
|
||||
// // Comment this out to get a profile of the JS code in v8/profile.json.
|
||||
|
||||
@@ -128,6 +128,7 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
|
||||
.wait_ms = opts.wait_ms,
|
||||
.wait_until = opts.wait_until,
|
||||
.wait_script = opts.wait_script,
|
||||
.inject_script = opts.inject_script,
|
||||
.wait_selector = opts.wait_selector,
|
||||
.dump_mode = opts.dump,
|
||||
.dump = .{
|
||||
|
||||
@@ -338,12 +338,21 @@ pub var test_notification: *Notification = undefined;
|
||||
pub var test_session: *Session = undefined;
|
||||
|
||||
const WEB_API_TEST_ROOT = "src/browser/tests/";
|
||||
const HtmlRunnerOpts = struct {};
|
||||
const HtmlRunnerOpts = struct {
|
||||
timeout_ms: u32 = 2000,
|
||||
inject_script: ?[]const u8 = null,
|
||||
};
|
||||
|
||||
pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
|
||||
_ = opts;
|
||||
defer reset();
|
||||
|
||||
var inject_scripts: [1][]const u8 = undefined;
|
||||
if (opts.inject_script) |script| {
|
||||
inject_scripts[0] = script;
|
||||
test_session.inject_scripts = inject_scripts[0..1];
|
||||
}
|
||||
defer test_session.inject_scripts = &.{};
|
||||
|
||||
const root = try std.fs.path.joinZ(arena_allocator, &.{ WEB_API_TEST_ROOT, path });
|
||||
const stat = std.fs.cwd().statFile(root) catch |err| {
|
||||
std.debug.print("Failed to stat file: '{s}'", .{root});
|
||||
@@ -356,7 +365,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
|
||||
return;
|
||||
}
|
||||
try @import("root").subtest(root);
|
||||
try runWebApiTest(root);
|
||||
try runWebApiTest(root, opts.timeout_ms);
|
||||
},
|
||||
.directory => {
|
||||
var dir = try std.fs.cwd().openDir(root, .{
|
||||
@@ -382,7 +391,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
|
||||
|
||||
const full_path = try std.fs.path.joinZ(arena_allocator, &.{ root, entry.name });
|
||||
try @import("root").subtest(entry.name);
|
||||
try runWebApiTest(full_path);
|
||||
try runWebApiTest(full_path, opts.timeout_ms);
|
||||
}
|
||||
},
|
||||
else => |kind| {
|
||||
@@ -392,7 +401,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void {
|
||||
}
|
||||
}
|
||||
|
||||
fn runWebApiTest(test_file: [:0]const u8) !void {
|
||||
fn runWebApiTest(test_file: [:0]const u8, timeout_ms: u32) !void {
|
||||
const frame = try test_session.createPage();
|
||||
defer test_session.removePage();
|
||||
|
||||
@@ -418,7 +427,7 @@ fn runWebApiTest(test_file: [:0]const u8) !void {
|
||||
var runner = try test_session.runner(.{});
|
||||
try runner.wait(.{ .ms = 2000, .until = .load });
|
||||
|
||||
var wait_ms: u32 = 2000;
|
||||
var wait_ms: u32 = timeout_ms;
|
||||
var timer = try std.time.Timer.start();
|
||||
while (true) {
|
||||
var try_catch: js.TryCatch = undefined;
|
||||
|
||||
Reference in New Issue
Block a user