From 290fc7a9df7119f21cbd31ce198daad65dcf556e Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Tue, 28 Apr 2026 19:20:09 +0200 Subject: [PATCH 01/12] xpath: implement XPath 1.0 evaluator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the capybara-lightpanda XPath 1.0 polyfill into Lightpanda. Exposes the WHATWG Document.evaluate / XPathResult / XPathEvaluator / XPathExpression surface and routes CDP DOM.performSearch XPath queries through the new evaluator. The capybara-lightpanda gem can drop its ~700-line JS polyfill in the next release. New module src/browser/xpath/ (Tokenizer, Parser, Ast, Evaluator, Functions, Result). New webapi types XPathResult, XPathExpression, XPathEvaluator. Coverage and stubs match the polyfill 1:1 — see capybara-lightpanda/XPATH_COMPLIANCE.md for the full spec. Tests: 91-case conformance + result-API + evaluator-API + CDP fixtures, plus the engine's Zig unit suite (601/601 pass). --- src/browser/js/bridge.zig | 3 + .../tests/cdp/perform_search_xpath.html | 8 + .../tests/xpath/document_evaluate.html | 123 +++ .../tests/xpath/xpath_conformance.html | 202 ++++ src/browser/tests/xpath/xpath_evaluator.html | 103 ++ src/browser/tests/xpath/xpath_result.html | 193 ++++ src/browser/webapi/Document.zig | 43 + src/browser/webapi/XPathEvaluator.zig | 97 ++ src/browser/webapi/XPathExpression.zig | 100 ++ src/browser/webapi/XPathResult.zig | 277 ++++++ src/browser/xpath/Ast.zig | 134 +++ src/browser/xpath/Evaluator.zig | 725 ++++++++++++++ src/browser/xpath/Functions.zig | 630 ++++++++++++ src/browser/xpath/Parser.zig | 923 ++++++++++++++++++ src/browser/xpath/Result.zig | 200 ++++ src/browser/xpath/Tokenizer.zig | 466 +++++++++ src/cdp/domains/dom.zig | 102 +- 17 files changed, 4322 insertions(+), 7 deletions(-) create mode 100644 src/browser/tests/cdp/perform_search_xpath.html create mode 100644 src/browser/tests/xpath/document_evaluate.html create mode 100644 src/browser/tests/xpath/xpath_conformance.html create mode 100644 src/browser/tests/xpath/xpath_evaluator.html create mode 100644 src/browser/tests/xpath/xpath_result.html create mode 100644 src/browser/webapi/XPathEvaluator.zig create mode 100644 src/browser/webapi/XPathExpression.zig create mode 100644 src/browser/webapi/XPathResult.zig create mode 100644 src/browser/xpath/Ast.zig create mode 100644 src/browser/xpath/Evaluator.zig create mode 100644 src/browser/xpath/Functions.zig create mode 100644 src/browser/xpath/Parser.zig create mode 100644 src/browser/xpath/Result.zig create mode 100644 src/browser/xpath/Tokenizer.zig diff --git a/src/browser/js/bridge.zig b/src/browser/js/bridge.zig index 366f83af..9761540b 100644 --- a/src/browser/js/bridge.zig +++ b/src/browser/js/bridge.zig @@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{ @import("../webapi/CryptoKey.zig"), @import("../webapi/Selection.zig"), @import("../webapi/ImageData.zig"), + @import("../webapi/XPathResult.zig"), + @import("../webapi/XPathExpression.zig"), + @import("../webapi/XPathEvaluator.zig"), }); // APIs available on Worker context globals (constructors like URL, Headers, etc.) diff --git a/src/browser/tests/cdp/perform_search_xpath.html b/src/browser/tests/cdp/perform_search_xpath.html new file mode 100644 index 00000000..e30ca1c1 --- /dev/null +++ b/src/browser/tests/cdp/perform_search_xpath.html @@ -0,0 +1,8 @@ + + +
+

1

+

2

+
+

3

+ diff --git a/src/browser/tests/xpath/document_evaluate.html b/src/browser/tests/xpath/document_evaluate.html new file mode 100644 index 00000000..2c4fdc58 --- /dev/null +++ b/src/browser/tests/xpath/document_evaluate.html @@ -0,0 +1,123 @@ + + + +

Hello

+
+

First

+

Second

+

Third

+
+ x + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/xpath/xpath_conformance.html b/src/browser/tests/xpath/xpath_conformance.html new file mode 100644 index 00000000..f200ecbb --- /dev/null +++ b/src/browser/tests/xpath/xpath_conformance.html @@ -0,0 +1,202 @@ + + + + XPath conformance + + + +

Hello World

+

First paragraph with emphasis.

+

Second paragraph.

+ + + + + + + + +
NameAge
Alice30
Bob25
Carol40
+
+
AB
+
Click me
+
Other link
+
+
+ + + + + + +
+ +
+
+

One

+

Two

+

Three

+
+ + + + diff --git a/src/browser/tests/xpath/xpath_evaluator.html b/src/browser/tests/xpath/xpath_evaluator.html new file mode 100644 index 00000000..6cb6a886 --- /dev/null +++ b/src/browser/tests/xpath/xpath_evaluator.html @@ -0,0 +1,103 @@ + + + +

Hello

+

One

+

Two

+ + + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/xpath/xpath_result.html b/src/browser/tests/xpath/xpath_result.html new file mode 100644 index 00000000..f7674e7b --- /dev/null +++ b/src/browser/tests/xpath/xpath_result.html @@ -0,0 +1,193 @@ + + + +

Hello

+

One

+

Two

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 747e08c2..095fa48f 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig"); const StyleSheetList = @import("css/StyleSheetList.zig"); const FontFaceSet = @import("css/FontFaceSet.zig"); const Selection = @import("Selection.zig"); +const XPathResult = @import("XPathResult.zig"); +const XPathExpression = @import("XPathExpression.zig"); pub const XMLDocument = @import("XMLDocument.zig"); pub const HTMLDocument = @import("HTMLDocument.zig"); @@ -412,6 +414,40 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame); } +pub fn evaluate( + self: *Document, + expression: []const u8, + context_node: ?*Node, + resolver: ?js.Function, + result_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // resolver/result are no-ops in HTML mode (decision #2). + _ = resolver; + _ = result; + return XPathResult.fromExpression( + expression, + context_node orelse self.asNode(), + result_type, + frame, + ); +} + +pub fn createExpression( + _: *const Document, + expression: []const u8, + resolver: ?js.Function, + frame: *Frame, +) !*XPathExpression { + _ = resolver; + return XPathExpression.init(expression, frame); +} + +pub fn createNSResolver(_: *const Document, node: *Node) ?*Node { + return node; +} + fn whatToShow(value_: ?js.Value) !u32 { const value = value_ orelse return 4294967295; // show all when undefined if (value.isUndefined()) { @@ -1053,6 +1089,9 @@ pub const JsApi = struct { pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true }); pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{}); pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{}); + pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true }); + pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true }); + pub const createNSResolver = bridge.function(Document.createNSResolver, .{}); pub const getElementById = bridge.function(_getElementById, .{}); fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element { const value = value_ orelse return null; @@ -1113,3 +1152,7 @@ const testing = @import("../../testing.zig"); test "WebApi: Document" { try testing.htmlRunner("document", .{}); } + +test "WebApi: Document.evaluate" { + try testing.htmlRunner("xpath/document_evaluate.html", .{}); +} diff --git a/src/browser/webapi/XPathEvaluator.zig b/src/browser/webapi/XPathEvaluator.zig new file mode 100644 index 00000000..ec651de0 --- /dev/null +++ b/src/browser/webapi/XPathEvaluator.zig @@ -0,0 +1,97 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation. +//! Mirrors `Document.evaluate` / `Document.createExpression` / +//! `Document.createNSResolver` so an explicit +//! `new XPathEvaluator()` instance can be used in place of the +//! document. + +const std = @import("std"); + +const js = @import("../js/js.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); +const XPathResult = @import("XPathResult.zig"); +const XPathExpression = @import("XPathExpression.zig"); + +const XPathEvaluator = @This(); + +// Padding to avoid zero-size struct identity_map collisions (matches +// the convention in ResizeObserver.zig). +_pad: bool = false, + +pub fn init() XPathEvaluator { + return .{}; +} + +pub fn evaluate( + _: *const XPathEvaluator, + expression: []const u8, + context_node: *Node, + resolver: ?js.Function, + requested_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // Namespace resolver is accepted-and-ignored (HTML mode — decision #2). + // Result reuse is also a no-op; XPathResult.fromExpression always + // allocates a fresh instance. + _ = resolver; + _ = result; + return XPathResult.fromExpression(expression, context_node, requested_type, frame); +} + +pub fn createExpression( + _: *const XPathEvaluator, + expression: []const u8, + resolver: ?js.Function, + frame: *Frame, +) !*XPathExpression { + _ = resolver; + return XPathExpression.init(expression, frame); +} + +pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node { + // HTML-mode passthrough — the WHATWG IDL accepts a Node and returns + // an `XPathNSResolver`, but in practice the input node is reused. + return node; +} + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathEvaluator); + + pub const Meta = struct { + pub const name = "XPathEvaluator"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + pub const empty_with_no_proto = true; + }; + + pub const constructor = bridge.constructor(XPathEvaluator.init, .{}); + pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true }); + pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true }); + pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{}); +}; + +const testing = @import("../../testing.zig"); + +test "WebApi: XPathEvaluator + XPathExpression" { + try testing.htmlRunner("xpath/xpath_evaluator.html", .{}); +} diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig new file mode 100644 index 00000000..6dba00fb --- /dev/null +++ b/src/browser/webapi/XPathExpression.zig @@ -0,0 +1,100 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathExpression` — a parsed XPath expression cached for +//! repeated evaluation. The parsed AST lives in this object's per- +//! instance arena (long-lived); each `evaluate()` call gets a fresh +//! arena for its own result data so multiple evaluations don't grow +//! the AST arena. + +const std = @import("std"); +const lp = @import("lightpanda"); + +const js = @import("../js/js.zig"); +const Page = @import("../Page.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); +const XPathResult = @import("XPathResult.zig"); + +const xpath = struct { + const Ast = @import("../xpath/Ast.zig"); + const Parser = @import("../xpath/Parser.zig"); + const Evaluator = @import("../xpath/Evaluator.zig"); +}; + +const Allocator = std.mem.Allocator; + +const XPathExpression = @This(); + +_rc: lp.RC(u8) = .{}, +_arena: Allocator, +_expr: *const xpath.Ast.Expr, + +pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { + const arena = try frame.getArena(.tiny, "XPathExpression"); + errdefer frame.releaseArena(arena); + + const expr = try xpath.Parser.parse(arena, expression); + const xe = try arena.create(XPathExpression); + xe.* = .{ ._arena = arena, ._expr = expr }; + return xe; +} + +pub fn evaluate( + self: *XPathExpression, + context_node: *Node, + requested_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // The `result` reuse parameter (WHATWG: optional XPathResult to + // populate) is accepted-and-ignored: we always allocate fresh, + // which matches every modern browser's effective behavior. + _ = result; + + const arena = try frame.getArena(.medium, "XPathResult"); + errdefer frame.releaseArena(arena); + + const eval_result = try xpath.Evaluator.evaluate(arena, frame, self._expr, context_node); + return XPathResult.fromResult(arena, requested_type, eval_result); +} + +pub fn deinit(self: *XPathExpression, page: *Page) void { + page.releaseArena(self._arena); +} + +pub fn acquireRef(self: *XPathExpression) void { + self._rc.acquire(); +} + +pub fn releaseRef(self: *XPathExpression, page: *Page) void { + self._rc.release(self, page); +} + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathExpression); + + pub const Meta = struct { + pub const name = "XPathExpression"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + }; + + pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true }); +}; diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig new file mode 100644 index 00000000..6bf5095b --- /dev/null +++ b/src/browser/webapi/XPathResult.zig @@ -0,0 +1,277 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathResult` (full surface, all 10 type constants — decision +//! #4). Wraps the evaluator's `Result.Result` for JS consumption: +//! coerces to the requested result type at construction, exposes the +//! type-tagged accessors, and serves the iterator/snapshot APIs. +//! +//! Lifetime model: each `XPathResult` owns a per-instance arena +//! (`getArena(.medium, ...)`) that holds both the struct and the result +//! data (node-set slice, formatted strings). The arena is released in +//! `deinit` once the JS wrapper's refcount hits zero. +//! +//! Type-mismatch accessor calls return `error.InvalidStateError` — +//! translated to a `DOMException` by `bridge.function(.., .{ +//! .dom_exception = true })`. The WHATWG IDL technically specifies +//! `TypeError` for type mismatches, but `InvalidStateError` is what +//! decision #4 captures and what most legacy XPath consumers expect. + +const std = @import("std"); +const lp = @import("lightpanda"); + +const js = @import("../js/js.zig"); +const Page = @import("../Page.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); + +// XPath runtime helpers. Aliased to keep the cross-directory imports +// readable when both modules expose a `Result` type. +const xpath = struct { + const Parser = @import("../xpath/Parser.zig"); + const Evaluator = @import("../xpath/Evaluator.zig"); + const Result = @import("../xpath/Result.zig"); +}; + +const Allocator = std.mem.Allocator; + +const XPathResult = @This(); + +// WHATWG type constants. ANY_TYPE is a request flag — at construction +// it resolves to one of the four concrete categories (NUMBER, STRING, +// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression +// produced. +pub const ANY_TYPE: u16 = 0; +pub const NUMBER_TYPE: u16 = 1; +pub const STRING_TYPE: u16 = 2; +pub const BOOLEAN_TYPE: u16 = 3; +pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4; +pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5; +pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6; +pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7; +pub const ANY_UNORDERED_NODE_TYPE: u16 = 8; +pub const FIRST_ORDERED_NODE_TYPE: u16 = 9; + +const Value = union(enum) { + number: f64, + string: []const u8, + boolean: bool, + nodes: []const *Node, +}; + +_rc: lp.RC(u8) = .{}, +_arena: Allocator, +_type: u16, +_value: Value, +_iter_pos: usize = 0, + +// ----- constructors ----- + +/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and +/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the +/// AST + result data + the struct itself. +pub fn fromExpression( + expression: []const u8, + context_node: *Node, + requested_type: u16, + frame: *Frame, +) !*XPathResult { + const arena = try frame.getArena(.medium, "XPathResult"); + errdefer frame.releaseArena(arena); + + const expr = try xpath.Parser.parse(arena, expression); + const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); + return fromResult(arena, requested_type, result); +} + +/// Wrap an already-evaluated `Result.Result` into an XPathResult. The +/// caller hands over ownership of `arena` — the XPathResult will release +/// it on deinit. Used by `XPathExpression.evaluate` (which has its own +/// AST cache and only allocates a fresh result arena). +pub fn fromResult( + arena: Allocator, + requested_type: u16, + result: xpath.Result.Result, +) !*XPathResult { + const value: Value = switch (requested_type) { + ANY_TYPE => switch (result) { + .number => |n| .{ .number = n }, + .string => |s| .{ .string = s }, + .boolean => |b| .{ .boolean = b }, + .node_set => |ns| .{ .nodes = ns }, + }, + NUMBER_TYPE => .{ .number = try xpath.Result.toNumber(arena, result) }, + STRING_TYPE => .{ .string = try xpath.Result.toString(arena, result) }, + BOOLEAN_TYPE => .{ .boolean = xpath.Result.toBoolean(result) }, + UNORDERED_NODE_ITERATOR_TYPE, + ORDERED_NODE_ITERATOR_TYPE, + UNORDERED_NODE_SNAPSHOT_TYPE, + ORDERED_NODE_SNAPSHOT_TYPE, + ANY_UNORDERED_NODE_TYPE, + FIRST_ORDERED_NODE_TYPE, + => switch (result) { + .node_set => |ns| .{ .nodes = ns }, + // Requesting a node-set type for a non-node-set expression. + // WHATWG specifies TypeError, but DOMException.fromError has + // no TypeError mapping (would surface as a plain JS Error); + // unify on InvalidStateError per the project plan. + else => return error.InvalidStateError, + }, + else => return error.InvalidStateError, + }; + + const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) { + .number => NUMBER_TYPE, + .string => STRING_TYPE, + .boolean => BOOLEAN_TYPE, + .nodes => UNORDERED_NODE_ITERATOR_TYPE, + } else requested_type; + + const xr = try arena.create(XPathResult); + xr.* = .{ + ._arena = arena, + ._type = final_type, + ._value = value, + }; + return xr; +} + +// ----- lifecycle ----- + +pub fn deinit(self: *XPathResult, page: *Page) void { + page.releaseArena(self._arena); +} + +pub fn acquireRef(self: *XPathResult) void { + self._rc.acquire(); +} + +pub fn releaseRef(self: *XPathResult, page: *Page) void { + self._rc.release(self, page); +} + +// ----- accessors ----- + +fn getResultType(self: *const XPathResult) u16 { + return self._type; +} + +fn getNumberValue(self: *const XPathResult) !f64 { + if (self._type != NUMBER_TYPE) return error.InvalidStateError; + return self._value.number; +} + +fn getStringValue(self: *const XPathResult) ![]const u8 { + if (self._type != STRING_TYPE) return error.InvalidStateError; + return self._value.string; +} + +fn getBooleanValue(self: *const XPathResult) !bool { + if (self._type != BOOLEAN_TYPE) return error.InvalidStateError; + return self._value.boolean; +} + +fn getSingleNodeValue(self: *const XPathResult) !?*Node { + if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) { + return error.InvalidStateError; + } + return if (self._value.nodes.len == 0) null else self._value.nodes[0]; +} + +fn getSnapshotLength(self: *const XPathResult) !u32 { + if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) { + return error.InvalidStateError; + } + return @intCast(self._value.nodes.len); +} + +/// Live mutation tracking on the iterator isn't implemented — we hold a +/// frozen pointer slice, so the iterator is never "invalidated" by DOM +/// edits during traversal. Always returns false; matches the polyfill, +/// which is snapshot-only. +fn getInvalidIteratorState(_: *const XPathResult) bool { + return false; +} + +// ----- methods ----- + +pub fn iterateNext(self: *XPathResult) !?*Node { + if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) { + return error.InvalidStateError; + } + if (self._iter_pos >= self._value.nodes.len) return null; + const node = self._value.nodes[self._iter_pos]; + self._iter_pos += 1; + return node; +} + +pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node { + if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) { + return error.InvalidStateError; + } + if (index >= self._value.nodes.len) return null; + return self._value.nodes[index]; +} + +// ----- JS bridge ----- + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathResult); + + pub const Meta = struct { + pub const name = "XPathResult"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + }; + + // Type constants — both static (on the constructor) and instance + // properties per the WHATWG IDL. `template = true` makes them + // class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works. + pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true }); + pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true }); + pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true }); + pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true }); + pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true }); + pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true }); + pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true }); + pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true }); + pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true }); + pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true }); + + pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{}); + pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true }); + pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true }); + pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true }); + pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true }); + pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true }); + pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{}); + + pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true }); + pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true }); +}; + +const testing = @import("../../testing.zig"); + +test "WebApi: XPathResult" { + try testing.htmlRunner("xpath/xpath_result.html", .{}); +} + +test "WebApi: XPath conformance" { + try testing.htmlRunner("xpath/xpath_conformance.html", .{}); +} diff --git a/src/browser/xpath/Ast.zig b/src/browser/xpath/Ast.zig new file mode 100644 index 00000000..00125e33 --- /dev/null +++ b/src/browser/xpath/Ast.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 AST. +//! +//! Mirrors the polyfill AST in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js, the `op:`-tagged +//! object literals built by Parser.prototype.parse*). Slices and +//! pointers are arena-owned by the Parser; the AST has no destructor. + +pub const Expr = union(enum) { + /// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`. + path: Path, + /// Filter expression followed by a location-path tail: + /// `(//a)/b`, `(expr)//c`. + filter_path: FilterPath, + /// Filter expression with a single predicate: `(expr)[n]`. + /// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2). + filter: Filter, + binop: BinOp, + /// Unary minus. The polyfill has no unary `+`. + neg: *Expr, + /// String literal, quotes stripped. + literal: []const u8, + /// Numeric literal, parsed to f64. + number: f64, + /// Variable reference. The leading `$` is stripped; per decision #3 + /// the evaluator always returns the empty string. + var_ref: []const u8, + fn_call: FnCall, +}; + +pub const Path = struct { + absolute: bool, + steps: []const Step, +}; + +pub const FilterPath = struct { + filter: *Expr, + steps: []const Step, +}; + +pub const Filter = struct { + expr: *Expr, + predicate: *Expr, +}; + +pub const BinOp = struct { + op: BinOpKind, + left: *Expr, + right: *Expr, +}; + +pub const BinOpKind = enum { + or_, + and_, + eq, + neq, + lt, + gt, + lte, + gte, + add, + sub, + mul, + div, + mod, + union_, +}; + +pub const FnCall = struct { + name: []const u8, + args: []const *Expr, +}; + +pub const Step = struct { + axis: Axis, + node_test: NodeTest, + predicates: []const *Expr, +}; + +pub const Axis = enum { + child, + descendant, + descendant_or_self, + self, + parent, + ancestor, + ancestor_or_self, + following_sibling, + preceding_sibling, + following, + preceding, + attribute, + namespace, + /// Polyfill parity (decision #2): unknown axis names parse to + /// this variant; the evaluator returns an empty node-set. + unknown, +}; + +pub const NodeTest = union(enum) { + /// Element / attribute name. Special values: + /// - "*" → wildcard + /// - "prefix:*" → namespace wildcard + /// - "prefix:local" → namespace-prefixed name + /// The evaluator splits these. + name: []const u8, + /// `node()`, `text()`, `comment()`, `processing-instruction()`. + /// The optional target literal of `processing-instruction("foo")` + /// is consumed but not stored (decision #3 stub). + type_test: TypeTest, +}; + +pub const TypeTest = enum { + node, + text, + comment, + processing_instruction, +}; diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig new file mode 100644 index 00000000..a16d7b37 --- /dev/null +++ b/src/browser/xpath/Evaluator.zig @@ -0,0 +1,725 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 evaluator — runs an `Ast.Expr` against a context node and +//! produces a `Result`. Mirrors the polyfill's `evaluate()` and +//! `evalStep()` (lib/capybara/lightpanda/javascripts/index.js, lines +//! 344–644). The evaluator allocates intermediate values (node-set +//! slices, formatted numbers, materialized attribute nodes) into the +//! caller's arena. The context `Frame` is needed for `getElementById` +//! and to materialize attributes (the attribute axis returns full +//! `Attribute` nodes so the result is `*Node`-uniform). +//! +//! Document-order sort happens once at the public boundary +//! (`evaluate()`); intermediate step results stay in axis order so +//! reverse-axis positional predicates evaluate against proximity. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const lp = @import("lightpanda"); + +const Ast = @import("Ast.zig"); +const Parser = @import("Parser.zig"); +const Result = @import("Result.zig"); +const Functions = @import("Functions.zig"); +const Node = @import("../webapi/Node.zig"); +const Element = Node.Element; +const Document = Node.Document; +const Frame = lp.Frame; + +const Evaluator = @This(); + +pub const Error = error{ + OutOfMemory, + WriteFailed, + // Surfaces from Attribute materialization (`Entry.toAttribute` → + // `String.dupe` enforces a length limit). The polyfill never hits + // this since JS strings are unbounded, but Lightpanda's `String` + // type caps at u32::MAX bytes — propagate so callers can surface + // a DOM exception. + StringTooLarge, + UnknownFunction, + UnionRequiresNodeSets, +}; + +arena: Allocator, +frame: *Frame, + +/// Public entry. Returns the AST's value; node-sets are sorted into +/// document order before return per XPath spec §3.3. +pub fn evaluate(arena: Allocator, frame: *Frame, expr: *const Ast.Expr, context_node: *Node) Error!Result.Result { + var ev = Evaluator{ .arena = arena, .frame = frame }; + const result = try ev.evalExpr(expr, context_node, 1, 1); + if (result == .node_set) { + sortDocOrder(@constCast(result.node_set)); + } + return result; +} + +pub const SearchError = Error || Parser.Error; + +/// Convenience for `DOM.performSearch` and capybara `xpathFind`: parse + +/// evaluate and unwrap the node-set. Top-level scalar expressions yield +/// an empty slice (decision #3 — these APIs are for finding nodes, not +/// arbitrary computation). +pub fn searchAll(arena: Allocator, frame: *Frame, root: *Node, expression: []const u8) SearchError![]const *Node { + const expr = try Parser.parse(arena, expression); + return switch (try evaluate(arena, frame, expr, root)) { + .node_set => |ns| ns, + else => &.{}, + }; +} + +// ----- AST evaluation ----- + +fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + return switch (expr.*) { + .number => |n| .{ .number = n }, + .literal => |s| .{ .string = s }, + .var_ref => .{ .string = "" }, // decision #3 stub + .neg => |inner| blk: { + const v = try self.evalExpr(inner, ctx, pos, size); + const n = try Result.toNumber(self.arena, v); + break :blk .{ .number = -n }; + }, + .binop => |bo| try self.evalBinop(bo, ctx, pos, size), + .path => |p| try self.evalPath(p, ctx), + .filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size), + .filter => |f| try self.evalFilter(f, ctx, pos, size), + .fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size), + }; +} + +fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { + const start: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx; + break :blk owner.asNode(); + } else ctx; + + var current = try self.arena.alloc(*Node, 1); + current[0] = start; + var current_set: []const *Node = current; + + for (path.steps) |step| { + const r = try self.evalStep(current_set, step); + current_set = r.node_set; + } + return .{ .node_set = current_set }; +} + +fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + const base = try self.evalExpr(fp.filter, ctx, pos, size); + if (base != .node_set) return base; + + var current: []const *Node = base.node_set; + for (fp.steps) |step| { + const r = try self.evalStep(current, step); + current = r.node_set; + } + return .{ .node_set = current }; +} + +fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + const base = try self.evalExpr(f.expr, ctx, pos, size); + if (base != .node_set) return base; + + var out: std.ArrayList(*Node) = .empty; + const sz = base.node_set.len; + for (base.node_set, 0..) |n, idx| { + const k = idx + 1; + const val = try self.evalExpr(f.predicate, n, k, sz); + if (predicateMatches(val, k)) try out.append(self.arena, n); + } + return .{ .node_set = out.items }; +} + +// ----- step + axis ----- + +fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Result.Result { + var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + + // Pre-lowercase the name test once per step. matchNameTest does + // case-insensitive matching (decision #2); without this hoist, every + // axis node would pay the per-byte case-fold inside `eqlIgnoreCase`. + const lowered_name: ?[]const u8 = switch (step.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n), + .type_test => null, + }; + + for (ctx_nodes) |ctx| { + const axis_nodes = try self.axisNodes(ctx, step.axis); + + var filtered: std.ArrayList(*Node) = .empty; + for (axis_nodes) |n| { + if (matchTest(n, step.node_test, step.axis, lowered_name)) { + try filtered.append(self.arena, n); + } + } + + var current: []const *Node = filtered.items; + for (step.predicates) |pred| { + var next: std.ArrayList(*Node) = .empty; + const sz = current.len; + for (current, 0..) |n, idx| { + const k = idx + 1; + const val = try self.evalExpr(pred, n, k, sz); + if (predicateMatches(val, k)) try next.append(self.arena, n); + } + current = next.items; + } + + for (current) |n| try dedup.put(self.arena, n, {}); + } + + return .{ .node_set = dedup.keys() }; +} + +fn axisNodes(self: *Evaluator, node: *Node, axis: Ast.Axis) Error![]const *Node { + var out: std.ArrayList(*Node) = .empty; + switch (axis) { + .child => { + var it = node.childrenIterator(); + while (it.next()) |c| try out.append(self.arena, c); + }, + .descendant => try self.appendDescendants(node, &out), + .descendant_or_self => { + try out.append(self.arena, node); + try self.appendDescendants(node, &out); + }, + .self => try out.append(self.arena, node), + .parent => { + if (node.parentNode()) |p| try out.append(self.arena, p); + }, + // Reverse axes — proximity order (nearest first). Final node-set + // is sorted to document order at the public boundary. + .ancestor => { + var p = node.parentNode(); + while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n); + }, + .ancestor_or_self => { + try out.append(self.arena, node); + var p = node.parentNode(); + while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n); + }, + .following_sibling => { + var s = node.nextSibling(); + while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n); + }, + .preceding_sibling => { + var s = node.previousSibling(); + while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n); + }, + .following => try self.appendFollowing(node, &out), + .preceding => try self.appendPreceding(node, &out), + .attribute => try self.appendAttributes(node, &out), + .namespace, .unknown => {}, // decision #3 stubs + } + return out.items; +} + +fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void { + var it = node.childrenIterator(); + while (it.next()) |c| { + try out.append(self.arena, c); + try self.appendDescendants(c, out); + } +} + +fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void { + var n: ?*Node = start; + while (n) |cur| : (n = cur.parentNode()) { + var s = cur.nextSibling(); + while (s) |sn| : (s = sn.nextSibling()) { + try out.append(self.arena, sn); + try self.appendDescendants(sn, out); + } + } +} + +fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void { + // Reverse document order: deepest-last children first, then self. + var c = n.lastChild(); + while (c) |child| : (c = child.previousSibling()) { + try self.appendPrecedingSubtree(child, out); + } + try out.append(self.arena, n); +} + +fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void { + var n: ?*Node = start; + while (n) |cur| { + const parent = cur.parentNode() orelse break; + var s = cur.previousSibling(); + while (s) |sn| : (s = sn.previousSibling()) { + try self.appendPrecedingSubtree(sn, out); + } + n = parent; + } +} + +fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void { + const el = node.is(Element) orelse return; + var it = el.attributeIterator(); + while (it.next()) |entry| { + // Materialize as full Attribute so the result is *Node-uniform. + // Allocates from frame.arena (long-lived); attribute axis is + // typically leaf, so churn is bounded. + const attr = try entry.toAttribute(el, self.frame); + try out.append(self.arena, attr._proto); + } +} + +// ----- node test matching ----- + +fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[]const u8) bool { + return switch (test_) { + .type_test => |kind| switch (kind) { + .node => true, + .text => node.getNodeType() == 3, + .comment => node.getNodeType() == 8, + .processing_instruction => node.getNodeType() == 7, + }, + .name => |name| matchNameTest(node, name, axis, lowered_name), + }; +} + +fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[]const u8) bool { + // `lowered_name` is non-null iff `name != "*"`. Element tag names + // (`getTagNameLower`) and html5ever-stored attribute names are already + // lowercase, so a plain `mem.eql` against the pre-lowered test name + // replaces the per-call `eqlIgnoreCase`. + if (axis == .attribute) { + if (std.mem.eql(u8, name, "*")) return node._type == .attribute; + const attr = switch (node._type) { + .attribute => |a| a, + else => return false, + }; + return std.mem.eql(u8, attr._name.str(), lowered_name.?); + } + const el = node.is(Element) orelse return false; + if (std.mem.eql(u8, name, "*")) return true; + return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?); +} + +// ----- binop ----- + +fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + switch (bo.op) { + .or_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + if (Result.toBoolean(l)) return .{ .boolean = true }; + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = Result.toBoolean(r) }; + }, + .and_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + if (!Result.toBoolean(l)) return .{ .boolean = false }; + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = Result.toBoolean(r) }; + }, + .eq, .neq, .lt, .gt, .lte, .gte => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = try self.xCmp(l, r, bo.op) }; + }, + .add, .sub, .mul, .div, .mod => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + const ln = try Result.toNumber(self.arena, l); + const rn = try Result.toNumber(self.arena, r); + const v: f64 = switch (bo.op) { + .add => ln + rn, + .sub => ln - rn, + .mul => ln * rn, + .div => ln / rn, + // JS `%` and Zig `@rem` agree on sign for finite values + // and propagate NaN (XPath §3.5). + .mod => @rem(ln, rn), + else => unreachable, + }; + return .{ .number = v }; + }, + .union_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets; + var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + for (l.node_set) |n| try seen.put(self.arena, n, {}); + for (r.node_set) |n| try seen.put(self.arena, n, {}); + const nodes = seen.keys(); + sortDocOrder(@constCast(nodes)); + return .{ .node_set = nodes }; + }, + } +} + +// ----- comparison (XPath spec §3.4) ----- + +fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.BinOpKind) Error!bool { + const is_eq = (op == .eq or op == .neq); + const l_is_set = (left == .node_set); + const r_is_set = (right == .node_set); + + if (l_is_set and r_is_set) { + // Cache right-side string-values once. Without this, each left node + // would pay |right| allocations — O(N×M) for a set×set comparison + // (e.g. `//foo = //bar` on a large page). + const right_strings = try self.arena.alloc([]const u8, right.node_set.len); + for (right.node_set, 0..) |r, i| { + right_strings[i] = try Result.stringValueOf(self.arena, r); + } + for (left.node_set) |l| { + const lv = try Result.stringValueOf(self.arena, l); + for (right_strings) |rv| { + const matched = if (is_eq) + cmpString(lv, rv, op) + else + cmpNumber(Result.stringToNumber(lv), Result.stringToNumber(rv), op); + if (matched) return true; + } + } + return false; + } + + if (l_is_set or r_is_set) { + const ns = if (l_is_set) left.node_set else right.node_set; + const other = if (l_is_set) right else left; + const ns_left = l_is_set; + + if (other == .boolean) { + const ns_b = ns.len > 0; + const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b }; + return cmpBool(a, b, op); + } + + for (ns) |n| { + const sv = try Result.stringValueOf(self.arena, n); + const matched = switch (other) { + .number => |num| blk: { + const sv_num = Result.stringToNumber(sv); + const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num }; + break :blk cmpNumber(a, b, op); + }, + .string => |s| blk: { + if (is_eq) { + const a, const b = if (ns_left) .{ sv, s } else .{ s, sv }; + break :blk cmpString(a, b, op); + } + const sv_num = Result.stringToNumber(sv); + const s_num = Result.stringToNumber(s); + const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num }; + break :blk cmpNumber(a, b, op); + }, + .boolean, .node_set => unreachable, // handled above + }; + if (matched) return true; + } + return false; + } + + // Neither is a node-set. + if (is_eq) { + if (left == .boolean or right == .boolean) { + return cmpBool(Result.toBoolean(left), Result.toBoolean(right), op); + } + if (left == .number or right == .number) { + const ln = try Result.toNumber(self.arena, left); + const rn = try Result.toNumber(self.arena, right); + return cmpNumber(ln, rn, op); + } + const ls = try Result.toString(self.arena, left); + const rs = try Result.toString(self.arena, right); + return cmpString(ls, rs, op); + } + // Non-eq with no node-set: both → number. + const ln = try Result.toNumber(self.arena, left); + const rn = try Result.toNumber(self.arena, right); + return cmpNumber(ln, rn, op); +} + +fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { + const equal = std.mem.eql(u8, a, b); + return switch (op) { + .eq => equal, + .neq => !equal, + else => unreachable, // <, > etc. always coerce to number first + }; +} + +fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { + // Native f64 comparison gives correct NaN semantics: + // NaN == X is false, NaN != X is true, NaN < X (etc.) is false. + return switch (op) { + .eq => a == b, + .neq => a != b, + .lt => a < b, + .gt => a > b, + .lte => a <= b, + .gte => a >= b, + else => unreachable, + }; +} + +fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { + return switch (op) { + .eq => a == b, + .neq => a != b, + else => unreachable, + }; +} + +// ----- function calls ----- + +fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + // position()/last() stay here — they need the (pos, size) closure + // that Functions.call doesn't see. Keeping them inline avoids + // pushing per-call context through Functions' signature. + if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) }; + if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) }; + + // Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)` + // pattern; lazy short-circuit isn't needed because `or`/`and` are + // binops handled in evalBinop, not function calls. + const eval_args = try self.arena.alloc(Result.Result, fc.args.len); + for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); + + return Functions.call(self.arena, self.frame, fc.name, eval_args, ctx); +} + +// ----- helpers ----- + +fn predicateMatches(val: Result.Result, position: usize) bool { + return switch (val) { + // Numeric predicate value selects only the node at that position + // (1-based). Non-integer numbers never match. + .number => |n| n == @as(f64, @floatFromInt(position)), + else => Result.toBoolean(val), + }; +} + +pub fn sortDocOrder(nodes: []*Node) void { + if (nodes.len <= 1) return; + std.mem.sort(*Node, nodes, {}, lessThanDocOrder); +} + +fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool { + if (a == b) return false; + const pos = a.compareDocumentPosition(b); + // FOLLOWING (0x04) — b comes after a in document order. + return (pos & 0x04) != 0; +} + +// --------------------------------------------------------------------- +// Tests — pure-logic only. DOM-dependent evaluation lands as HTML +// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig +// way to construct a Frame + Document tree without the JS runtime. +// --------------------------------------------------------------------- + +const testing = std.testing; +const Tokenizer = @import("Tokenizer.zig"); + +test "Evaluator: cmpNumber NaN semantics" { + const nan = std.math.nan(f64); + try testing.expect(!cmpNumber(nan, nan, .eq)); + try testing.expect(cmpNumber(nan, nan, .neq)); + try testing.expect(!cmpNumber(nan, 0, .lt)); + try testing.expect(!cmpNumber(nan, 0, .gt)); + try testing.expect(!cmpNumber(nan, 0, .lte)); + try testing.expect(!cmpNumber(nan, 0, .gte)); + try testing.expect(cmpNumber(0, 0, .eq)); + try testing.expect(cmpNumber(1, 2, .lt)); + try testing.expect(cmpNumber(2, 1, .gt)); + try testing.expect(cmpNumber(1, 1, .lte)); + try testing.expect(cmpNumber(1, 1, .gte)); +} + +test "Evaluator: cmpString" { + try testing.expect(cmpString("a", "a", .eq)); + try testing.expect(!cmpString("a", "b", .eq)); + try testing.expect(cmpString("a", "b", .neq)); + try testing.expect(!cmpString("a", "a", .neq)); +} + +test "Evaluator: cmpBool" { + try testing.expect(cmpBool(true, true, .eq)); + try testing.expect(!cmpBool(true, false, .eq)); + try testing.expect(cmpBool(true, false, .neq)); +} + +test "Evaluator: predicateMatches numeric vs boolean" { + try testing.expect(predicateMatches(.{ .number = 1 }, 1)); + try testing.expect(!predicateMatches(.{ .number = 2 }, 1)); + // Non-integer never matches. + try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1)); + // Boolean: any truthy value passes regardless of position. + try testing.expect(predicateMatches(.{ .boolean = true }, 7)); + try testing.expect(!predicateMatches(.{ .boolean = false }, 1)); + // String: nonempty truthy. + try testing.expect(predicateMatches(.{ .string = "x" }, 99)); + try testing.expect(!predicateMatches(.{ .string = "" }, 1)); + // Empty node-set: falsy. + try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1)); +} + +test "Evaluator: scalar arithmetic via parsed expressions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "1 + 2", 3 }, + .{ "5 - 3", 2 }, + .{ "4 * 2", 8 }, + .{ "10 div 4", 2.5 }, + .{ "10 mod 3", 1 }, + .{ "-5", -5 }, + .{ "1 + 2 * 3", 7 }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + // Frame is unused for pure-arithmetic AST. The unsafe cast lets + // us exercise binop / number paths without a real DOM. Any path + // accessing the Frame would crash; the inputs above never do. + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const ctx_dummy: *Node = @ptrFromInt(0x2000); + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Evaluator: scalar comparison via parsed expressions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "1 = 1", true }, + .{ "1 = 2", false }, + .{ "1 != 2", true }, + .{ "1 < 2", true }, + .{ "2 < 1", false }, + .{ "1 <= 1", true }, + .{ "2 >= 2", true }, + .{ "'abc' = 'abc'", true }, + .{ "'abc' != 'abd'", true }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const ctx_dummy: *Node = @ptrFromInt(0x2000); + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Evaluator: position() and last() reflect context" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + { + const expr = try Parser.parse(a, "position()"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 3, 5); + try testing.expectEqual(@as(f64, 3), r.number); + } + { + const expr = try Parser.parse(a, "last()"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 3, 5); + try testing.expectEqual(@as(f64, 5), r.number); + } + { + // Logical short-circuit: last() never evaluates if first + // operand is true. + const expr = try Parser.parse(a, "1 = 1 or last() > 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r.boolean); + } +} + +test "Evaluator: short-circuit and/or" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + inline for (.{ + .{ "1 = 2 or 1 = 1", true }, + .{ "1 = 1 and 1 = 2", false }, + .{ "1 = 1 and 2 = 2", true }, + .{ "1 = 2 and 1 = 1", false }, + .{ "1 = 2 or 2 = 1", false }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Evaluator: unary minus" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + const expr = try Parser.parse(a, "-(3 + 2)"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expectEqual(@as(f64, -5), r.number); +} + +test "Evaluator: division by zero produces infinity / NaN per IEEE" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + { + const expr = try Parser.parse(a, "1 div 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(std.math.isPositiveInf(r.number)); + } + { + const expr = try Parser.parse(a, "0 div 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Evaluator: searchAll on scalar expression returns empty (decision #3)" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + // Synthetic frame/root pointers are safe here because pure-scalar + // expressions (binop, literal, true(), comparison) never reach into + // the Frame or the context node. Adding a DOM-touching expression + // (e.g. `id('x')`) to this list would crash on dereference. + inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| { + const nodes = try searchAll(a, @ptrFromInt(0x1000), @ptrFromInt(0x2000), expr); + try testing.expectEqual(@as(usize, 0), nodes.len); + } +} diff --git a/src/browser/xpath/Functions.zig b/src/browser/xpath/Functions.zig new file mode 100644 index 00000000..d0ae7eac --- /dev/null +++ b/src/browser/xpath/Functions.zig @@ -0,0 +1,630 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 core function library — 25 functions per polyfill parity +//! (lib/capybara/lightpanda/javascripts/index.js, `evalFunc` at lines +//! 646–770). `position()` and `last()` live in `Evaluator.evalFnCall` +//! because they need the `(pos, size)` closure that this module never +//! sees. +//! +//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager +//! evaluation matches the polyfill's `evaluate(args[i], ctx, pos, size)` +//! pattern — short-circuit operators (`or`/`and`) are binops, not +//! function calls, so laziness isn't required here. The pre-evaluation +//! contract also keeps Functions.zig free of a circular import on +//! Evaluator.zig. +//! +//! Stubs per decision #3 (XPATH_COMPLIANCE.md): +//! - `lang(string)` → always false +//! - `namespace-uri(...)` → always "" +//! - `name`/`local-name` → lowercased (HTML pragmatism) +//! +//! Allocations land in the caller's per-evaluation arena. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const lp = @import("lightpanda"); + +const Result = @import("Result.zig"); +const Node = @import("../webapi/Node.zig"); +const Element = Node.Element; +const Document = Node.Document; +const Frame = lp.Frame; + +pub const Error = error{ + OutOfMemory, + WriteFailed, + StringTooLarge, + UnknownFunction, +}; + +/// Dispatch a core-library function call. Returns `error.UnknownFunction` +/// if `name` doesn't match — the caller (Evaluator) handles +/// `position()` / `last()` inline before getting here, so this is the +/// last lookup stop. +pub fn call( + arena: Allocator, + frame: *Frame, + name: []const u8, + args: []const Result.Result, + ctx: *Node, +) Error!Result.Result { + // -- Node-set -- + if (eql(name, "count")) return .{ .number = countFn(args) }; + if (eql(name, "id")) return idFn(arena, frame, args, ctx); + if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) }; + if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) }; + if (eql(name, "namespace-uri")) return .{ .string = "" }; + + // -- String -- + if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) }; + if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) }; + if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) }; + if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) }; + if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) }; + if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) }; + if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) }; + if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) }; + if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) }; + if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) }; + + // -- Boolean -- + if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else Result.toBoolean(args[0]) }; + if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !Result.toBoolean(args[0]) }; + if (eql(name, "true")) return .{ .boolean = true }; + if (eql(name, "false")) return .{ .boolean = false }; + if (eql(name, "lang")) return .{ .boolean = false }; + + // -- Number -- + if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) }; + if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) }; + if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try Result.toNumber(arena, args[0])) }; + if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try Result.toNumber(arena, args[0])) }; + if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try Result.toNumber(arena, args[0])) }; + + return error.UnknownFunction; +} + +inline fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); +} + +// ----- node-set fns ----- + +fn countFn(args: []const Result.Result) f64 { + if (args.len == 0 or args[0] != .node_set) return 0; + return @floatFromInt(args[0].node_set.len); +} + +fn idFn(arena: Allocator, frame: *Frame, args: []const Result.Result, ctx: *Node) Error!Result.Result { + if (args.len == 0) return .{ .node_set = &.{} }; + + // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar + // arg → `toStr`. Then split on whitespace and look up each token. + const id_str: []const u8 = blk: { + if (args[0] == .node_set) { + var buf = std.Io.Writer.Allocating.init(arena); + for (args[0].node_set, 0..) |n, i| { + if (i > 0) try buf.writer.writeByte(' '); + const sv = try Result.stringValueOf(arena, n); + try buf.writer.writeAll(sv); + } + break :blk buf.written(); + } + break :blk try Result.toString(arena, args[0]); + }; + + // `ctx.ownerDocument || ctx` — document nodes own themselves. + const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} }); + + var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace); + while (it.next()) |tok| { + if (doc.getElementById(tok, frame)) |el| { + try seen.put(arena, el.asNode(), {}); + } + } + return .{ .node_set = seen.keys() }; +} + +fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const node = firstNodeOrCtx(args, ctx) orelse return ""; + // For Element, `getLocalName` returns a slice into `_tag_name` + // (lowercase, namespace-prefix stripped) — lifetime exceeds the + // per-evaluation arena, so we borrow instead of duping. + if (node.is(Element)) |el| return el.getLocalName(); + var buf: [256]u8 = undefined; + return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); +} + +fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const node = firstNodeOrCtx(args, ctx) orelse return ""; + // Diverges from `local-name` only on namespaced elements: `name` + // keeps the prefix (`ns:foo`), `local-name` strips it (`foo`). + if (node.is(Element)) |el| return el.getTagNameLower(); + var buf: [256]u8 = undefined; + return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); +} + +fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { + if (args.len == 0) return ctx; + if (args[0] != .node_set) return null; + if (args[0].node_set.len == 0) return null; + return args[0].node_set[0]; +} + +// ----- string fns ----- + +fn stringFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + if (args.len == 0) return try Result.stringValueOf(arena, ctx); + return try Result.toString(arena, args[0]); +} + +fn concatFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + var buf = std.Io.Writer.Allocating.init(arena); + for (args) |a| { + const s = try Result.toString(arena, a); + try buf.writer.writeAll(s); + } + return buf.written(); +} + +fn startsWithFn(arena: Allocator, args: []const Result.Result) Error!bool { + if (args.len < 2) return false; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + return std.mem.startsWith(u8, s1, s2); +} + +fn containsFn(arena: Allocator, args: []const Result.Result) Error!bool { + if (args.len < 2) return false; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + return std.mem.indexOf(u8, s1, s2) != null; +} + +fn substringBeforeFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + if (std.mem.indexOf(u8, s1, s2)) |idx| { + return s1[0..idx]; + } + return ""; +} + +fn substringAfterFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + if (std.mem.indexOf(u8, s1, s2)) |idx| { + return s1[idx + s2.len ..]; + } + return ""; +} + +fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s = try Result.toString(arena, args[0]); + const start_raw = try Result.toNumber(arena, args[1]); + if (std.math.isNan(start_raw)) return ""; + const start = roundHalfToPosInf(start_raw); + + const s_len: f64 = @floatFromInt(s.len); + if (args.len >= 3) { + const len_raw = try Result.toNumber(arena, args[2]); + if (std.math.isNan(len_raw)) return ""; + const len = roundHalfToPosInf(len_raw); + const sum = start - 1 + len; + // -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior. + if (std.math.isNan(sum)) return ""; + const si_f = @max(start - 1, 0); + const ei_f = @min(sum, s_len); + if (si_f >= ei_f) return ""; + const si: usize = @intFromFloat(si_f); + const ei: usize = @intFromFloat(ei_f); + return s[si..ei]; + } + + const si_f = @max(start - 1, 0); + if (si_f >= s_len) return ""; + const si: usize = @intFromFloat(si_f); + return s[si..]; +} + +fn stringLengthFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { + const s = if (args.len == 0) + try Result.stringValueOf(arena, ctx) + else + try Result.toString(arena, args[0]); + // Polyfill returns UTF-16 code units; we return UTF-8 bytes. They + // agree on ASCII (the gem's 91-case battery is ASCII-only). See + // .claude/skills/xpath-port/NOTES.md for the divergence rationale. + return @floatFromInt(s.len); +} + +fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const s = if (args.len == 0) + try Result.stringValueOf(arena, ctx) + else + try Result.toString(arena, args[0]); + + const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); + if (trimmed.len == 0) return ""; + + var buf = std.Io.Writer.Allocating.init(arena); + var prev_space = false; + for (trimmed) |c| { + if (std.ascii.isWhitespace(c)) { + if (!prev_space) try buf.writer.writeByte(' '); + prev_space = true; + } else { + try buf.writer.writeByte(c); + prev_space = false; + } + } + return buf.written(); +} + +fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 3) return ""; + const s = try Result.toString(arena, args[0]); + const from = try Result.toString(arena, args[1]); + const to = try Result.toString(arena, args[2]); + + var buf = std.Io.Writer.Allocating.init(arena); + for (s) |c| { + if (std.mem.indexOfScalar(u8, from, c)) |idx| { + // Chars in `from` past `to.len` are deleted (no copy). + if (idx < to.len) try buf.writer.writeByte(to[idx]); + } else { + try buf.writer.writeByte(c); + } + } + return buf.written(); +} + +// ----- number fns ----- + +fn numberFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { + if (args.len == 0) { + const sv = try Result.stringValueOf(arena, ctx); + return Result.stringToNumber(sv); + } + return try Result.toNumber(arena, args[0]); +} + +fn sumFn(arena: Allocator, args: []const Result.Result) Error!f64 { + if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64); + var total: f64 = 0; + for (args[0].node_set) |n| { + const sv = try Result.stringValueOf(arena, n); + total += Result.stringToNumber(sv); + } + return total; +} + +/// Round half toward positive infinity. Matches JS `Math.round` (the +/// polyfill calls it for both `round()` and `substring()`): +/// round(0.5) = 1 round(-0.5) = 0 +/// round(1.5) = 2 round(-1.5) = -1 +/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`. +fn roundHalfToPosInf(n: f64) f64 { + if (std.math.isNan(n) or !std.math.isFinite(n)) return n; + return std.math.floor(n + 0.5); +} + +// --------------------------------------------------------------------- +// Tests — pure-logic only. Functions that need a real DOM (id, name, +// local-name, string with element ctx, sum, count of node-set, etc.) +// are exercised via Phase 9 HTML fixtures in tests/xpath/. +// --------------------------------------------------------------------- + +const testing = std.testing; +const Tokenizer = @import("Tokenizer.zig"); +const Parser = @import("Parser.zig"); +const Evaluator = @import("Evaluator.zig"); + +fn evalScalar(a: Allocator, src: []const u8) !Result.Result { + const expr = try Parser.parse(a, src); + // Synthetic Frame/Node pointers — the public `evaluate` entry only + // touches the Frame for path/axis evaluation. Pure-scalar expressions + // (arithmetic, function calls returning scalars) never deref it. + return Evaluator.evaluate(a, @ptrFromInt(0x1000), expr, @ptrFromInt(0x2000)); +} + +test "Functions: count() of non-node-set returns 0" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const r = try evalScalar(arena.allocator(), "count('hello')"); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, 0), r.number); +} + +test "Functions: string() on scalar coerces" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "string(42)", "42" }, + .{ "string(3.14)", "3.14" }, + .{ "string(true())", "true" }, + .{ "string(false())", "false" }, + .{ "string('hello')", "hello" }, + .{ "string(0)", "0" }, + .{ "string(-1)", "-1" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: concat() variadic" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "concat('a', 'b')", "ab" }, + .{ "concat('a', 'b', 'c')", "abc" }, + .{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" }, + .{ "concat('x', 1, 'y')", "x1y" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: starts-with / contains" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "starts-with('hello', 'he')", true }, + .{ "starts-with('hello', 'el')", false }, + .{ "starts-with('hello', '')", true }, + .{ "contains('hello world', 'wor')", true }, + .{ "contains('hello', 'xyz')", false }, + .{ "contains('hello', '')", true }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Functions: substring-before / substring-after" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "substring-before('1999/04/01', '/')", "1999" }, + .{ "substring-before('hello', 'xyz')", "" }, + .{ "substring-after('1999/04/01', '/')", "04/01" }, + .{ "substring-after('hello', 'xyz')", "" }, + .{ "substring-after('hello', '')", "hello" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: substring() — XPath 1-based, rounding, NaN handling" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "substring('12345', 2, 3)", "234" }, + .{ "substring('12345', 2)", "2345" }, + // XPath spec example: round(1.5) = 2 → start at pos 2, len 2. + .{ "substring('12345', 1.5, 2.6)", "234" }, + // start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2. + .{ "substring('12345', 0, 3)", "12" }, + // Negative start clamps to 0. + .{ "substring('12345', -3, 7)", "123" }, + // NaN start. + .{ "substring('12345', 'foo')", "" }, + // NaN length. + .{ "substring('12345', 1, 'foo')", "" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: string-length on scalar arg" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "string-length('hello')", 5 }, + .{ "string-length('')", 0 }, + .{ "string-length('a b c')", 5 }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Functions: normalize-space" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "normalize-space(' hello world ')", "hello world" }, + .{ "normalize-space('hello')", "hello" }, + .{ "normalize-space('')", "" }, + .{ "normalize-space(' ')", "" }, + .{ "normalize-space('a\tb\nc')", "a b c" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: translate" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + // Standard XPath spec example. + .{ "translate('bar', 'abc', 'ABC')", "BAr" }, + // Char in `from` past `to.len` is deleted. + .{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" }, + .{ "translate('hello', '', '')", "hello" }, + // Identity. + .{ "translate('abc', 'abc', 'abc')", "abc" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: boolean / not / true / false / lang" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "true()", true }, + .{ "false()", false }, + .{ "not(true())", false }, + .{ "not(false())", true }, + .{ "boolean(1)", true }, + .{ "boolean(0)", false }, + .{ "boolean('')", false }, + .{ "boolean('x')", true }, + // lang is a stub — always false. + .{ "lang('en')", false }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Functions: number() on scalar arg" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "number('42')"); + try testing.expectEqual(@as(f64, 42), r.number); + } + { + const r = try evalScalar(a, "number(true())"); + try testing.expectEqual(@as(f64, 1), r.number); + } + { + const r = try evalScalar(a, "number(false())"); + try testing.expectEqual(@as(f64, 0), r.number); + } + { + const r = try evalScalar(a, "number('foo')"); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Functions: floor / ceiling / round" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "floor(1.5)", 1 }, + .{ "floor(-1.5)", -2 }, + .{ "floor(0)", 0 }, + .{ "ceiling(1.5)", 2 }, + .{ "ceiling(-1.5)", -1 }, + .{ "ceiling(0)", 0 }, + // Half-toward-positive-infinity (JS Math.round behavior). + .{ "round(0.5)", 1 }, + .{ "round(-0.5)", 0 }, + .{ "round(1.5)", 2 }, + .{ "round(-1.5)", -1 }, + .{ "round(2.5)", 3 }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Functions: round/floor/ceiling propagate NaN and Infinity" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "round(1 div 0)"); // +Infinity + try testing.expect(std.math.isPositiveInf(r.number)); + } + { + const r = try evalScalar(a, "round(0 div 0)"); // NaN + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "floor(0 div 0)"); + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "ceiling(0 div 0)"); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Functions: sum / count on non-node-set defaults" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "sum('hello')"); + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "count('hello')"); + try testing.expectEqual(@as(f64, 0), r.number); + } +} + +test "Functions: roundHalfToPosInf" { + try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5)); + try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5)); + try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5)); + try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5)); + try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5)); + try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64)))); + try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64)))); + try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64)))); +} diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig new file mode 100644 index 00000000..88d25b26 --- /dev/null +++ b/src/browser/xpath/Parser.zig @@ -0,0 +1,923 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 expression parser. +//! +//! Mirrors the polyfill `Parser.prototype.*` chain in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js): recursive descent over +//! a fully-tokenized stream, producing an `Ast.Expr` tree allocated on +//! the caller's arena. The AST borrows string/name slices from `input` +//! and is valid for as long as the arena and input outlive it. + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const Ast = @import("Ast.zig"); + +const Parser = @This(); + +pub const Error = error{ + OutOfMemory, + UnexpectedToken, + ExpectedNodeTest, + ExpectedPrimaryExpr, +}; + +arena: Allocator, +tokens: []const Token, +pos: usize = 0, + +pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { + var token_list: std.ArrayList(Token) = .empty; + // Token count is bounded by input length; ¼-byte-per-token is + // generous for typical XPath and skips ArrayList regrowth. + try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4)); + var tokenizer = Tokenizer{ .input = input }; + while (true) { + const tok = tokenizer.next(); + try token_list.append(arena, tok); + if (tok == .eof) break; + } + + var parser = Parser{ + .arena = arena, + .tokens = token_list.items, + }; + const expr = try parser.parseExpr(); + if (parser.peek() != .eof) return error.UnexpectedToken; + return expr; +} + +// --- token cursor helpers --- + +fn peek(self: *const Parser) Token { + return self.tokens[self.pos]; +} + +fn lookahead(self: *const Parser, offset: usize) Token { + const idx = self.pos + offset; + if (idx >= self.tokens.len) return .eof; + return self.tokens[idx]; +} + +fn advance(self: *Parser) Token { + const tok = self.tokens[self.pos]; + self.pos += 1; + return tok; +} + +fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool { + return self.peek() == tag; +} + +fn match(self: *Parser, tag: std.meta.Tag(Token)) bool { + if (self.at(tag)) { + _ = self.advance(); + return true; + } + return false; +} + +fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token { + if (!self.at(tag)) return error.UnexpectedToken; + return self.advance(); +} + +fn matchKeyword(self: *Parser, keyword: []const u8) bool { + const tok = self.peek(); + if (tok == .name and std.mem.eql(u8, tok.name, keyword)) { + _ = self.advance(); + return true; + } + return false; +} + +fn makeExpr(self: *Parser, value: Ast.Expr) Error!*Ast.Expr { + const expr = try self.arena.create(Ast.Expr); + expr.* = value; + return expr; +} + +fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr) Error!*Ast.Expr { + return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } }); +} + +// --- operator-precedence chain --- +// +// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path + +fn parseExpr(self: *Parser) Error!*Ast.Expr { + return self.parseOrExpr(); +} + +fn parseOrExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseAndExpr(); + while (self.matchKeyword("or")) { + const right = try self.parseAndExpr(); + left = try self.makeBinop(.or_, left, right); + } + return left; +} + +fn parseAndExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseEqualityExpr(); + while (self.matchKeyword("and")) { + const right = try self.parseEqualityExpr(); + left = try self.makeBinop(.and_, left, right); + } + return left; +} + +fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseRelationalExpr(); + while (equalityOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseRelationalExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseAdditiveExpr(); + while (relationalOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseAdditiveExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseMultExpr(); + while (additiveOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseMultExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +// After a complete unary expression, `*` is multiply; `div`/`mod` are +// operator-position keywords (tokenized as Name). +fn parseMultExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseUnaryExpr(); + while (multOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseUnaryExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { + if (self.match(.minus)) { + const operand = try self.parseUnaryExpr(); + return try self.makeExpr(.{ .neg = operand }); + } + return self.parseUnionExpr(); +} + +fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parsePathExpr(); + while (self.match(.pipe)) { + const right = try self.parsePathExpr(); + left = try self.makeBinop(.union_, left, right); + } + return left; +} + +// --- path expressions --- + +fn parsePathExpr(self: *Parser) Error!*Ast.Expr { + const t = self.peek(); + + if (t == .slash or t == .double_slash) { + return self.parseAbsPath(); + } + + // Filter-vs-relative-path disambiguation: a primary expression + // starts with `(`, string, number, `$`, or a `name(` where the + // name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`). + const is_filter = switch (t) { + .lparen, .string, .number, .dollar => true, + .name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name), + else => false, + }; + + if (is_filter) { + var primary = try self.parsePrimaryExpr(); + while (self.match(.lbracket)) { + const pred = try self.parseExpr(); + _ = try self.expect(.rbracket); + primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } }); + } + if (self.peek() == .slash or self.peek() == .double_slash) { + const dsl = self.advance() == .double_slash; + var steps: std.ArrayList(Ast.Step) = .empty; + if (dsl) try steps.append(self.arena, descendantOrSelfStep()); + try self.parseRelStepsInto(&steps); + return try self.makeExpr(.{ .filter_path = .{ + .filter = primary, + .steps = steps.items, + } }); + } + return primary; + } + + return self.parseRelPath(); +} + +fn parseAbsPath(self: *Parser) Error!*Ast.Expr { + var steps: std.ArrayList(Ast.Step) = .empty; + if (self.match(.double_slash)) { + try steps.append(self.arena, descendantOrSelfStep()); + try self.parseRelStepsInto(&steps); + } else { + _ = try self.expect(.slash); + // `/` alone is the document root — no step required. + if (self.canStartStep()) try self.parseRelStepsInto(&steps); + } + return try self.makeExpr(.{ .path = .{ + .absolute = true, + .steps = steps.items, + } }); +} + +fn parseRelPath(self: *Parser) Error!*Ast.Expr { + var steps: std.ArrayList(Ast.Step) = .empty; + try self.parseRelStepsInto(&steps); + return try self.makeExpr(.{ .path = .{ + .absolute = false, + .steps = steps.items, + } }); +} + +fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(Ast.Step)) Error!void { + try steps.append(self.arena, try self.parseStep()); + while (self.peek() == .slash or self.peek() == .double_slash) { + if (self.advance() == .double_slash) { + try steps.append(self.arena, descendantOrSelfStep()); + } + try steps.append(self.arena, try self.parseStep()); + } +} + +fn canStartStep(self: *const Parser) bool { + return switch (self.peek()) { + .name, .star, .dot, .double_dot, .at => true, + else => false, + }; +} + +fn parseStep(self: *Parser) Error!Ast.Step { + // Abbreviated steps `.` and `..` carry no axis, node-test, or + // predicates — predicates after `.` are a parse error per polyfill. + if (self.match(.dot)) return abbreviatedStep(.self); + if (self.match(.double_dot)) return abbreviatedStep(.parent); + + var axis: Ast.Axis = .child; + if (self.match(.at)) { + axis = .attribute; + } else if (self.peek() == .name and self.lookahead(1) == .double_colon) { + const axis_name = self.advance().name; + _ = self.advance(); // `::` + axis = parseAxisName(axis_name); + } + + const node_test = try self.parseNodeTest(); + + var preds: std.ArrayList(*Ast.Expr) = .empty; + while (self.match(.lbracket)) { + const pred = try self.parseExpr(); + _ = try self.expect(.rbracket); + try preds.append(self.arena, pred); + } + + return .{ .axis = axis, .node_test = node_test, .predicates = preds.items }; +} + +fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { + if (self.match(.star)) return .{ .name = "*" }; + if (self.peek() != .name) return error.ExpectedNodeTest; + + const name = self.peek().name; + if (typeTestKind(name)) |type_test| { + if (self.lookahead(1) == .lparen) { + _ = self.advance(); // name + _ = self.advance(); // `(` + // `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub). + if (type_test == .processing_instruction and self.peek() == .string) { + _ = self.advance(); + } + _ = try self.expect(.rparen); + return .{ .type_test = type_test }; + } + } + _ = self.advance(); + return .{ .name = name }; +} + +fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { + switch (self.peek()) { + .string => |s| { + _ = self.advance(); + return try self.makeExpr(.{ .literal = s }); + }, + .number => |n| { + _ = self.advance(); + return try self.makeExpr(.{ .number = n }); + }, + .dollar => { + _ = self.advance(); + const name_tok = try self.expect(.name); + return try self.makeExpr(.{ .var_ref = name_tok.name }); + }, + .lparen => { + _ = self.advance(); + const e = try self.parseExpr(); + _ = try self.expect(.rparen); + return e; + }, + .name => |name| { + _ = self.advance(); + _ = try self.expect(.lparen); + var args: std.ArrayList(*Ast.Expr) = .empty; + if (self.peek() != .rparen) { + try args.append(self.arena, try self.parseExpr()); + while (self.match(.comma)) { + try args.append(self.arena, try self.parseExpr()); + } + } + _ = try self.expect(.rparen); + return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } }); + }, + else => return error.ExpectedPrimaryExpr, + } +} + +// --- pure helpers --- + +fn equalityOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .eq => .eq, + .neq => .neq, + else => null, + }; +} + +fn relationalOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .lt => .lt, + .gt => .gt, + .lte => .lte, + .gte => .gte, + else => null, + }; +} + +fn additiveOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .plus => .add, + .minus => .sub, + else => null, + }; +} + +fn multOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .star => .mul, + .name => |name| blk: { + if (std.mem.eql(u8, name, "div")) break :blk .div; + if (std.mem.eql(u8, name, "mod")) break :blk .mod; + break :blk null; + }, + else => null, + }; +} + +fn descendantOrSelfStep() Ast.Step { + return .{ + .axis = .descendant_or_self, + .node_test = .{ .type_test = .node }, + .predicates = &.{}, + }; +} + +fn abbreviatedStep(axis: Ast.Axis) Ast.Step { + return .{ + .axis = axis, + .node_test = .{ .type_test = .node }, + .predicates = &.{}, + }; +} + +fn isNodeTypeName(name: []const u8) bool { + return typeTestKind(name) != null; +} + +const type_test_lookup = std.StaticStringMap(Ast.TypeTest).initComptime(.{ + .{ "node", .node }, + .{ "text", .text }, + .{ "comment", .comment }, + .{ "processing-instruction", .processing_instruction }, +}); + +fn typeTestKind(name: []const u8) ?Ast.TypeTest { + return type_test_lookup.get(name); +} + +const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ + .{ "child", .child }, + .{ "descendant", .descendant }, + .{ "descendant-or-self", .descendant_or_self }, + .{ "self", .self }, + .{ "parent", .parent }, + .{ "ancestor", .ancestor }, + .{ "ancestor-or-self", .ancestor_or_self }, + .{ "following-sibling", .following_sibling }, + .{ "preceding-sibling", .preceding_sibling }, + .{ "following", .following }, + .{ "preceding", .preceding }, + .{ "attribute", .attribute }, + .{ "namespace", .namespace }, +}); + +fn parseAxisName(name: []const u8) Ast.Axis { + return axis_lookup.get(name) orelse .unknown; +} + +// --------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------- + +const testing = std.testing; + +fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *Ast.Expr } { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + errdefer arena.deinit(); + const expr = try parse(arena.allocator(), input); + return .{ .arena = arena, .expr = expr }; +} + +test "XPath.Parser: number literal" { + var fx = try parseFixture("42"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 42), fx.expr.number); +} + +test "XPath.Parser: string literal" { + var fx = try parseFixture("'hello'"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("hello", fx.expr.literal); +} + +test "XPath.Parser: variable reference strips $" { + var fx = try parseFixture("$x"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("x", fx.expr.var_ref); +} + +test "XPath.Parser: parenthesized expression unwraps" { + var fx = try parseFixture("(42)"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 42), fx.expr.number); +} + +test "XPath.Parser: function call with no args" { + var fx = try parseFixture("position()"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("position", fx.expr.fn_call.name); + try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len); +} + +test "XPath.Parser: function call with args" { + var fx = try parseFixture("substring('abc', 2, 1)"); + defer fx.arena.deinit(); + const fc = fx.expr.fn_call; + try testing.expectEqualStrings("substring", fc.name); + try testing.expectEqual(@as(usize, 3), fc.args.len); + try testing.expectEqualStrings("abc", fc.args[0].literal); + try testing.expectEqual(@as(f64, 2), fc.args[1].number); + try testing.expectEqual(@as(f64, 1), fc.args[2].number); +} + +test "XPath.Parser: arithmetic precedence — mul binds tighter than add" { + var fx = try parseFixture("1 + 2 * 3"); + defer fx.arena.deinit(); + // Expected AST: add(1, mul(2, 3)) + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.add, top.op); + try testing.expectEqual(@as(f64, 1), top.left.number); + const mul = top.right.binop; + try testing.expectEqual(Ast.BinOpKind.mul, mul.op); + try testing.expectEqual(@as(f64, 2), mul.left.number); + try testing.expectEqual(@as(f64, 3), mul.right.number); +} + +test "XPath.Parser: arithmetic left-associativity" { + var fx = try parseFixture("1 - 2 - 3"); + defer fx.arena.deinit(); + // Expected AST: sub(sub(1, 2), 3) + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.sub, top.op); + try testing.expectEqual(@as(f64, 3), top.right.number); + const inner = top.left.binop; + try testing.expectEqual(Ast.BinOpKind.sub, inner.op); + try testing.expectEqual(@as(f64, 1), inner.left.number); + try testing.expectEqual(@as(f64, 2), inner.right.number); +} + +test "XPath.Parser: div and mod are operator-position keywords" { + var fx = try parseFixture("7 div 2"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.div, fx.expr.binop.op); + + var fx2 = try parseFixture("7 mod 2"); + defer fx2.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.mod, fx2.expr.binop.op); +} + +test "XPath.Parser: comparison operators" { + inline for (.{ + .{ "1 = 2", Ast.BinOpKind.eq }, + .{ "1 != 2", Ast.BinOpKind.neq }, + .{ "1 < 2", Ast.BinOpKind.lt }, + .{ "1 <= 2", Ast.BinOpKind.lte }, + .{ "1 > 2", Ast.BinOpKind.gt }, + .{ "1 >= 2", Ast.BinOpKind.gte }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.binop.op); + } +} + +test "XPath.Parser: logical or/and short-circuit chain" { + var fx = try parseFixture("a or b and c"); + defer fx.arena.deinit(); + // Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.or_, top.op); + try testing.expectEqual(Ast.BinOpKind.and_, top.right.binop.op); +} + +test "XPath.Parser: unary minus" { + var fx = try parseFixture("-1"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 1), fx.expr.neg.number); +} + +test "XPath.Parser: union" { + var fx = try parseFixture("a | b"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.union_, fx.expr.binop.op); +} + +test "XPath.Parser: absolute path / alone is document root" { + var fx = try parseFixture("/"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 0), path.steps.len); +} + +test "XPath.Parser: absolute path /foo" { + var fx = try parseFixture("/foo"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 1), path.steps.len); + try testing.expectEqualStrings("foo", path.steps[0].node_test.name); +} + +test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" { + var fx = try parseFixture("//foo"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(Ast.TypeTest.node, path.steps[0].node_test.type_test); + try testing.expectEqualStrings("foo", path.steps[1].node_test.name); +} + +test "XPath.Parser: relative path child::foo/bar" { + var fx = try parseFixture("foo/bar"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(!path.absolute); + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.child, path.steps[0].axis); + try testing.expectEqualStrings("foo", path.steps[0].node_test.name); + try testing.expectEqualStrings("bar", path.steps[1].node_test.name); +} + +test "XPath.Parser: abbreviated steps . and .." { + var fx = try parseFixture("./.."); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.self, path.steps[0].axis); + try testing.expectEqual(Ast.Axis.parent, path.steps[1].axis); +} + +test "XPath.Parser: attribute axis @class" { + var fx = try parseFixture("@class"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(Ast.Axis.attribute, step.axis); + try testing.expectEqualStrings("class", step.node_test.name); +} + +test "XPath.Parser: all 12 named axes parse correctly" { + inline for (.{ + .{ "child::a", Ast.Axis.child }, + .{ "descendant::a", Ast.Axis.descendant }, + .{ "descendant-or-self::a", Ast.Axis.descendant_or_self }, + .{ "self::a", Ast.Axis.self }, + .{ "parent::a", Ast.Axis.parent }, + .{ "ancestor::a", Ast.Axis.ancestor }, + .{ "ancestor-or-self::a", Ast.Axis.ancestor_or_self }, + .{ "following-sibling::a", Ast.Axis.following_sibling }, + .{ "preceding-sibling::a", Ast.Axis.preceding_sibling }, + .{ "following::a", Ast.Axis.following }, + .{ "preceding::a", Ast.Axis.preceding }, + .{ "namespace::a", Ast.Axis.namespace }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.path.steps[0].axis); + } +} + +test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" { + var fx = try parseFixture("wibble::a"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.Axis.unknown, fx.expr.path.steps[0].axis); +} + +test "XPath.Parser: wildcard *" { + var fx = try parseFixture("*"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name); +} + +test "XPath.Parser: namespace-prefixed name and wildcard" { + var fx = try parseFixture("svg:rect"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name); + + var fx2 = try parseFixture("svg:*"); + defer fx2.arena.deinit(); + try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name); +} + +test "XPath.Parser: node-type tests" { + inline for (.{ + .{ "node()", Ast.TypeTest.node }, + .{ "text()", Ast.TypeTest.text }, + .{ "comment()", Ast.TypeTest.comment }, + .{ "processing-instruction()", Ast.TypeTest.processing_instruction }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test); + } +} + +test "XPath.Parser: processing-instruction with literal target — consumed but ignored" { + var fx = try parseFixture("processing-instruction('xml-stylesheet')"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); +} + +test "XPath.Parser: predicate on step" { + var fx = try parseFixture("p[1]"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(@as(usize, 1), step.predicates.len); + try testing.expectEqual(@as(f64, 1), step.predicates[0].number); +} + +test "XPath.Parser: multi-predicate step" { + var fx = try parseFixture("p[1][@x]"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(@as(usize, 2), step.predicates.len); +} + +test "XPath.Parser: filter expression with predicate parses as Filter, not Step" { + var fx = try parseFixture("(//a)[1]"); + defer fx.arena.deinit(); + // Top level is Filter wrapping a parenthesized path with one predicate. + const filt = fx.expr.filter; + try testing.expectEqual(@as(f64, 1), filt.predicate.number); + try testing.expect(filt.expr.path.absolute); +} + +test "XPath.Parser: filter with multi-predicate nests" { + var fx = try parseFixture("(//a)[1][2]"); + defer fx.arena.deinit(); + const outer = fx.expr.filter; + try testing.expectEqual(@as(f64, 2), outer.predicate.number); + const inner = outer.expr.filter; + try testing.expectEqual(@as(f64, 1), inner.predicate.number); +} + +test "XPath.Parser: filter with location-path tail (filter_path)" { + var fx = try parseFixture("(//a)/b"); + defer fx.arena.deinit(); + const fp = fx.expr.filter_path; + try testing.expect(fp.filter.path.absolute); + try testing.expectEqual(@as(usize, 1), fp.steps.len); + try testing.expectEqualStrings("b", fp.steps[0].node_test.name); +} + +test "XPath.Parser: filter with // tail prepends descendant-or-self" { + var fx = try parseFixture("(//a)//b"); + defer fx.arena.deinit(); + const fp = fx.expr.filter_path; + try testing.expectEqual(@as(usize, 2), fp.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, fp.steps[0].axis); + try testing.expectEqualStrings("b", fp.steps[1].node_test.name); +} + +test "XPath.Parser: function call followed by predicate" { + var fx = try parseFixture("id('x')[1]"); + defer fx.arena.deinit(); + const filt = fx.expr.filter; + try testing.expectEqual(@as(f64, 1), filt.predicate.number); + try testing.expectEqualStrings("id", filt.expr.fn_call.name); +} + +test "XPath.Parser: complex representative expression" { + var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 3), path.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqualStrings("div", path.steps[1].node_test.name); + try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len); + try testing.expectEqualStrings("p", path.steps[2].node_test.name); + try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len); +} + +fn expectParseError(input: []const u8, expected: anyerror) !void { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectError(expected, parse(arena.allocator(), input)); +} + +test "XPath.Parser: error on unbalanced paren" { + try expectParseError("(1", error.UnexpectedToken); +} + +test "XPath.Parser: error on unbalanced bracket" { + try expectParseError("p[1", error.UnexpectedToken); +} + +test "XPath.Parser: error on missing node test" { + try expectParseError("child::", error.ExpectedNodeTest); +} + +test "XPath.Parser: bare `+` falls through to step and reports missing node test" { + // Matches polyfill: + isn't a path/primary start, so the parser + // ends up in parseStep with no name to use as node test. + try expectParseError("+", error.ExpectedNodeTest); +} + +test "XPath.Parser: error on trailing tokens" { + try expectParseError("1 2", error.UnexpectedToken); +} + +test "XPath.Parser: empty string falls through to step and reports missing node test" { + try expectParseError("", error.ExpectedNodeTest); +} + +test "XPath.Parser: 91-case gem battery — every expression parses" { + // Source: capybara-lightpanda spec/features/driver_spec.rb, + // describe "XPath polyfill — XPath 1.0 conformance" battery. + // Phase 2 acceptance criterion (references/phases.md). + const battery = [_][]const u8{ + "/html", + "/html/body", + "/", + "//h1", + "//ul/li", + "//ul//li", + ".", + ".//li", + "//section/*", + "//*[@id='heading']", + "//li[1]/following-sibling::li", + "//li[5]/preceding-sibling::li", + "//li/parent::ul", + "//li/ancestor::body", + "//li/ancestor-or-self::body", + "//li[3]/preceding::li", + "//li[1]/following::li", + "//ul/descendant::li", + "//ul/descendant-or-self::li", + "//section[1]/child::span", + "//*[@id='heading']/self::h1", + "//a[1]/attribute::href", + "//a[1]/@*", + "//li[1]", + "//li[last()]", + "//li[last() - 1]", + "//li[position() = 1]", + "//li[position() > 2]", + "//li[position() mod 2 = 1]", + "(//li)[1]", + "(//section)[2]", + "//li[3]/preceding-sibling::li[1]", + "//li[5]/ancestor::*[1]", + "//li[contains(concat(' ', @class, ' '), ' even ')][2]", + "//*[@id='heading' and @class='primary']", + "//*[@id='heading' or @id='p1']", + "//section[a]", + "//section[count(span) = 2]", + "//ul[count(li) = 5]", + "//tr[td[1]]", + "//tr[td/text() = 'Bob']", + "//*[starts-with(@id, 'link')]", + "//*[normalize-space() = 'Hello World']", + "//*[normalize-space(.) = 'Item 1']", + "//*[concat(@id, '-x') = 'heading-x']", + "//*[substring(@id, 1, 1) = 'p']", + "//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]", + "//p[translate(@id, 'p', 'q') = 'q1']", + "//*[substring-before(@id, '1') = 'p']", + "//*[substring-after(@id, 'lin') = 'k1']", + "//tr[number(td[2]) > 28]", + "//tr[floor(number(td[2]) div 10) = 3]", + "//tr[ceiling(number(td[2]) div 10) = 3]", + "//tr[round(number(td[2]) div 10) = 3]", + "//ul[sum(li/@data-len) = 0]", + "//p[boolean(@lang)]", + "//*[false()]", + "//*[name() = 'h1']", + "//*[local-name() = 'h1']", + "id('heading')", + "id('heading p1')", + "id(//em/parent::p/@id)", + "//h1 | //title", + "//h1 | //*[@id='p1']", + "//*[@id='heading'] | //*[@id='heading']", + "//li[position() + 1 = 3]", + "//li[position() - 1 = 0]", + "//li[position() * 2 = 4]", + "//li[position() div 2 = 1]", + "//li[(position() mod 2) = 0]", + "//tr[number(td[2]) = 30]", + "//tr[number(td[2]) != 30]", + "//tr[number(td[2]) < 30]", + "//tr[number(td[2]) <= 30]", + "//tr[number(td[2]) > 30]", + "//tr[number(td[2]) >= 30]", + "//tr[td[2] = 30]", + "//tr[td[2] = '30']", + "//comment()", + ".//a[contains(normalize-space(string(.)), 'Click me')]", + ".//input[(./@type = 'text')]", + ".//*[@id='heading']", + ".//li[contains(concat(' ', @class, ' '), ' even ')]", + "//*[@id='heading']/text()", + "//em/parent::p", + "//p[em]", + "//p[not(em)]", + "//section[a/@href = '/foo']", + "//ul/li[last()][position() = last()]", + "//ul[string(count(li)) = '5']", + "//body[count(//*[contains(@class, 'item')]) = 5]", + }; + try testing.expectEqual(@as(usize, 91), battery.len); + + for (battery) |expr| { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + _ = parse(arena.allocator(), expr) catch |err| { + std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) }); + return err; + }; + } +} diff --git a/src/browser/xpath/Result.zig b/src/browser/xpath/Result.zig new file mode 100644 index 00000000..c0822054 --- /dev/null +++ b/src/browser/xpath/Result.zig @@ -0,0 +1,200 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 runtime values. +//! +//! Mirrors the polyfill's untagged JS values (lib/capybara/lightpanda/ +//! javascripts/index.js, the `evaluate()` return convention): a node-set +//! is a JS array of nodes, and the three scalar types are JS primitives. +//! In Zig we tag the union explicitly. Type coercion (`toString`, +//! `toNumber`, `toBoolean`) follows XPath 1.0 spec §3, with HTML-pragmatic +//! shortcuts inherited from the polyfill (decision #2). + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const Node = @import("../webapi/Node.zig"); +const CData = Node.CData; + +pub const Result = union(enum) { + /// Owned by the evaluator's arena. Order is significant only at the + /// public boundary, where the evaluator sorts to document order. + node_set: []const *Node, + number: f64, + string: []const u8, + boolean: bool, +}; + +/// XPath spec §5: string-value of a node. +/// +/// - Element / Document: concatenated text descendants (excluding +/// comments and processing-instructions; matches `Node.getTextContent`) +/// - Attribute: attribute value +/// - Text / Comment / CDATA / PI: the node's data +/// - DocumentType / DocumentFragment: empty (matches polyfill's +/// `nodeValue || textContent || ''` fallthrough) +/// +/// The returned slice is borrowed from the node for cdata/attribute +/// (cheap, no allocation) and arena-allocated for element/document +/// (concatenation buffer). +pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 { + return switch (node._type) { + .attribute => |attr| attr._value.str(), + .cdata => |cd| cd._data.str(), + .element, .document => blk: { + var buf = std.Io.Writer.Allocating.init(arena); + try node.getTextContent(&buf.writer); + break :blk buf.written(); + }, + .document_type, .document_fragment => "", + }; +} + +pub fn toBoolean(val: Result) bool { + return switch (val) { + .boolean => |b| b, + .number => |n| n != 0 and !std.math.isNan(n), + .string => |s| s.len > 0, + .node_set => |ns| ns.len > 0, + }; +} + +/// Numeric coercion. Empty / whitespace-only strings produce NaN +/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying +/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`). +pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 { + return switch (val) { + .number => |n| n, + .boolean => |b| if (b) 1 else 0, + .string => |s| stringToNumber(s), + .node_set => |ns| blk: { + if (ns.len == 0) break :blk std.math.nan(f64); + const sv = try stringValueOf(arena, ns[0]); + break :blk stringToNumber(sv); + }, + }; +} + +pub fn stringToNumber(s: []const u8) f64 { + const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); + if (trimmed.len == 0) return std.math.nan(f64); + return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64); +} + +/// String coercion. Allocates only for `.number` (formatting) and for +/// `.node_set` whose first node is an Element/Document (text content +/// concatenation). Boolean → static string. String → borrowed. +pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 { + return switch (val) { + .string => |s| s, + .boolean => |b| if (b) "true" else "false", + .number => |n| try numberToString(arena, n), + .node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]), + }; +} + +/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings; +/// integer-valued numbers print without trailing `.0`. Diverges from +/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`. +pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 { + if (std.math.isNan(n)) return "NaN"; + if (std.math.isPositiveInf(n)) return "Infinity"; + if (std.math.isNegativeInf(n)) return "-Infinity"; + if (n == 0) return "0"; // covers +0 and -0 + if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) { + return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))}); + } + return std.fmt.allocPrint(arena, "{d}", .{n}); +} + +const testing = std.testing; + +test "Result: toBoolean" { + try testing.expect(toBoolean(.{ .boolean = true })); + try testing.expect(!toBoolean(.{ .boolean = false })); + try testing.expect(toBoolean(.{ .number = 1 })); + try testing.expect(!toBoolean(.{ .number = 0 })); + try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) })); + try testing.expect(toBoolean(.{ .string = "x" })); + try testing.expect(!toBoolean(.{ .string = "" })); + try testing.expect(!toBoolean(.{ .node_set = &.{} })); +} + +test "Result: stringToNumber" { + try testing.expectEqual(@as(f64, 42), stringToNumber("42")); + try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14")); + try testing.expectEqual(@as(f64, -1), stringToNumber("-1")); + try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 ")); + try testing.expect(std.math.isNan(stringToNumber(""))); + try testing.expect(std.math.isNan(stringToNumber(" "))); + try testing.expect(std.math.isNan(stringToNumber("abc"))); +} + +test "Result: numberToString — integers print without decimal" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("5", try numberToString(a, 5)); + try testing.expectEqualStrings("0", try numberToString(a, 0)); + try testing.expectEqualStrings("0", try numberToString(a, -0.0)); + try testing.expectEqualStrings("-1", try numberToString(a, -1)); + try testing.expectEqualStrings("42", try numberToString(a, 42.0)); +} + +test "Result: numberToString — special values" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64))); + try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64))); + try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64))); +} + +test "Result: numberToString — floats" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("3.14", try numberToString(a, 3.14)); + try testing.expectEqualStrings("0.5", try numberToString(a, 0.5)); +} + +test "Result: toString — boolean returns static string" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true })); + try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false })); +} + +test "Result: toString — node-set with empty arr is empty" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} })); +} + +test "Result: toNumber — empty node-set is NaN" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} }))); +} + +test "Result: toNumber — boolean coerces to 0/1" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true })); + try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false })); +} diff --git a/src/browser/xpath/Tokenizer.zig b/src/browser/xpath/Tokenizer.zig new file mode 100644 index 00000000..7b3a7a27 --- /dev/null +++ b/src/browser/xpath/Tokenizer.zig @@ -0,0 +1,466 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 expression tokenizer. +//! +//! Mirrors the polyfill `tokenize()` in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js) to preserve its +//! HTML-pragmatic behavior: lenient whitespace, case-preserving names, +//! no escape processing in string literals (use the other quote type +//! to embed), unknown characters silently skipped. +//! +//! The tokenizer borrows from the input slice and never allocates. +//! `next()` always returns a token; `.eof` is terminal and idempotent. + +const std = @import("std"); + +const Tokenizer = @This(); + +pub const Token = union(enum) { + /// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes + /// are not interpreted (the polyfill takes the raw substring). + string: []const u8, + + /// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the + /// runtime number type. + number: f64, + + /// Bare identifier — element/function/axis name, an `or`/`and`/ + /// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`, + /// `prefix:*`). The colon and optional wildcard are preserved + /// verbatim so the parser can split. + name: []const u8, + + slash, // `/` + double_slash, // `//` + dot, // `.` + double_dot, // `..` + at, // `@` + lparen, // `(` + rparen, // `)` + lbracket, // `[` + rbracket, // `]` + comma, // `,` + pipe, // `|` + eq, // `=` + neq, // `!=` + lt, // `<` + lte, // `<=` + gt, // `>` + gte, // `>=` + plus, // `+` + minus, // `-` + star, // `*` + dollar, // `$` + double_colon, // `::` + eof, +}; + +input: []const u8, +position: usize = 0, + +fn isEof(self: *const Tokenizer) bool { + return self.position >= self.input.len; +} + +// True iff the input has at least `n` bytes left after the current one +// — i.e. `byteAt(n)` will not read past the end. +fn hasAtLeast(self: *const Tokenizer, n: usize) bool { + return self.position + n < self.input.len; +} + +fn byteAt(self: *const Tokenizer, offset: usize) u8 { + return self.input[self.position + offset]; +} + +fn skipWhitespace(self: *Tokenizer) void { + while (!self.isEof()) { + switch (self.input[self.position]) { + ' ', '\t', '\n', '\r' => self.position += 1, + else => return, + } + } +} + +fn isNameStart(c: u8) bool { + return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; +} + +fn isNameContinue(c: u8) bool { + return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.'; +} + +fn consumeString(self: *Tokenizer, quote: u8) Token { + self.position += 1; // opening quote + const start = self.position; + while (!self.isEof() and self.input[self.position] != quote) { + self.position += 1; + } + const value = self.input[start..self.position]; + // Closing quote skipped; at EOF we just emit what we have (polyfill parity). + if (!self.isEof()) self.position += 1; + return .{ .string = value }; +} + +fn consumeNumber(self: *Tokenizer) Token { + const start = self.position; + while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) { + self.position += 1; + } + if (!self.isEof() and self.input[self.position] == '.') { + self.position += 1; + while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) { + self.position += 1; + } + } + // Caller only enters consumeNumber on a digit or `.digit`, so the + // slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by + // parseFloat (verified against Zig 0.15.2). + const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable; + return .{ .number = value }; +} + +fn consumeName(self: *Tokenizer) Token { + const start = self.position; + while (!self.isEof() and isNameContinue(self.input[self.position])) { + self.position += 1; + } + + // Optional namespace prefix: `prefix:local` or `prefix:*`. A `::` + // is the axis separator and belongs to the next token, so peek + // for a single `:` not followed by another `:`. + if (!self.isEof() and self.input[self.position] == ':' and + (self.position + 1 >= self.input.len or self.input[self.position + 1] != ':')) + { + self.position += 1; // `:` + if (!self.isEof() and self.input[self.position] == '*') { + self.position += 1; + } else { + while (!self.isEof() and isNameContinue(self.input[self.position])) { + self.position += 1; + } + } + } + + return .{ .name = self.input[start..self.position] }; +} + +pub fn next(self: *Tokenizer) Token { + while (true) { + self.skipWhitespace(); + if (self.isEof()) return .eof; + + const c = self.byteAt(0); + + if (c == '"' or c == '\'') { + return self.consumeString(c); + } + + if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) { + return self.consumeNumber(); + } + + if (self.hasAtLeast(1)) { + const c2 = self.byteAt(1); + switch (c) { + '/' => if (c2 == '/') { + self.position += 2; + return .double_slash; + }, + ':' => if (c2 == ':') { + self.position += 2; + return .double_colon; + }, + '!' => if (c2 == '=') { + self.position += 2; + return .neq; + }, + '<' => if (c2 == '=') { + self.position += 2; + return .lte; + }, + '>' => if (c2 == '=') { + self.position += 2; + return .gte; + }, + '.' => if (c2 == '.') { + self.position += 2; + return .double_dot; + }, + else => {}, + } + } + + const single: ?Token = switch (c) { + '(' => .lparen, + ')' => .rparen, + '[' => .lbracket, + ']' => .rbracket, + ',' => .comma, + '|' => .pipe, + '=' => .eq, + '<' => .lt, + '>' => .gt, + '+' => .plus, + '-' => .minus, + '*' => .star, + '$' => .dollar, + '/' => .slash, + '@' => .at, + '.' => .dot, + else => null, + }; + if (single) |tok| { + self.position += 1; + return tok; + } + + if (isNameStart(c)) { + return self.consumeName(); + } + + // Polyfill parity (decision #2): unknown characters are + // silently skipped, never an error. + self.position += 1; + } +} + +const testing = std.testing; + +fn expectTokens(input: []const u8, expected: []const Token) !void { + var tokenizer = Tokenizer{ .input = input }; + for (expected) |exp| { + const got = tokenizer.next(); + try testing.expectEqualDeep(exp, got); + } +} + +test "XPath.Tokenizer: empty input emits EOF" { + try expectTokens("", &.{.eof}); +} + +test "XPath.Tokenizer: only whitespace emits EOF" { + try expectTokens(" \t\n\r ", &.{.eof}); +} + +test "XPath.Tokenizer: EOF idempotent past end" { + var t = Tokenizer{ .input = "" }; + try testing.expectEqual(Token.eof, t.next()); + try testing.expectEqual(Token.eof, t.next()); + try testing.expectEqual(Token.eof, t.next()); +} + +test "XPath.Tokenizer: single-char operators" { + try expectTokens("()[],|=<>+-*$/@.", &.{ + .lparen, .rparen, .lbracket, .rbracket, .comma, .pipe, + .eq, .lt, .gt, .plus, .minus, .star, + .dollar, .slash, .at, .dot, .eof, + }); +} + +test "XPath.Tokenizer: two-char operators" { + try expectTokens("// :: != <= >= ..", &.{ + .double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof, + }); +} + +test "XPath.Tokenizer: two-char vs single-char disambiguation" { + try expectTokens("/a/b", &.{ + .slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof, + }); + try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof }); + try expectTokens("a 1 and q[1] == '/') return true; + if (q[0] == '(' and q.len > 1) { + if (q[1] == '/') return true; + if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; + } + return std.mem.indexOf(u8, q, "::") != null; +} + // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch fn performSearch(cmd: *CDP.Command) !void { const params = (try cmd.params(struct { @@ -100,15 +116,23 @@ fn performSearch(cmd: *CDP.Command) !void { const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded; - const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame); + const root = frame.window._document.asNode(); + + if (isXPathQuery(params.query)) { + const arena = try frame.getArena(.medium, "DOM.performSearch"); + defer frame.releaseArena(arena); + const nodes = try xpath.searchAll(arena, frame, root, params.query); + return finishSearch(cmd, bc, nodes); + } + + const list = try Selector.querySelectorAll(root, params.query, frame); defer list.deinit(frame._page); + return finishSearch(cmd, bc, list._nodes); +} - const search = try bc.node_search_list.create(list._nodes); - - // dispatch setChildNodesEvents to inform the client of the subpart of node - // tree covering the results. - try dispatchSetChildNodes(cmd, list._nodes); - +fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void { + const search = try bc.node_search_list.create(nodes); + try dispatchSetChildNodes(cmd, nodes); return cmd.sendResult(.{ .searchId = search.name, .resultCount = @as(u32, @intCast(search.node_ids.len)), @@ -616,6 +640,70 @@ test "cdp.dom: search flow" { try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 }); } +test "cdp.dom: performSearch with XPath" { + var ctx = try testing.context(); + defer ctx.deinit(); + + _ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" }); + + try ctx.processMessage(.{ + .id = 20, + .method = "DOM.performSearch", + .params = .{ .query = "//p" }, + }); + try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 }); + + try ctx.processMessage(.{ + .id = 21, + .method = "DOM.performSearch", + .params = .{ .query = "descendant::p" }, + }); + try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 }); + + try ctx.processMessage(.{ + .id = 22, + .method = "DOM.performSearch", + .params = .{ .query = "//*[@id='outer']" }, + }); + try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 }); + + try ctx.processMessage(.{ + .id = 23, + .method = "DOM.performSearch", + .params = .{ .query = "p" }, + }); + try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 }); + + try ctx.processMessage(.{ + .id = 24, + .method = "DOM.performSearch", + .params = .{ .query = "div p" }, + }); + try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 }); +} + +test "cdp.dom: isXPathQuery heuristic" { + // XPath-shaped queries — each line covers a distinct heuristic branch. + try std.testing.expect(isXPathQuery("/html")); + try std.testing.expect(isXPathQuery("//p")); + try std.testing.expect(isXPathQuery(".//foo")); + try std.testing.expect(isXPathQuery("(//foo)[1]")); + try std.testing.expect(isXPathQuery("(./bar)[2]")); + try std.testing.expect(isXPathQuery("descendant::p")); + try std.testing.expect(isXPathQuery("ancestor-or-self::*")); + try std.testing.expect(isXPathQuery("//*[@id='x']")); + + // CSS-shaped queries — fall through to the existing path. + try std.testing.expect(!isXPathQuery("")); + try std.testing.expect(!isXPathQuery("p")); + try std.testing.expect(!isXPathQuery("div p")); + try std.testing.expect(!isXPathQuery("#main")); + try std.testing.expect(!isXPathQuery(".cls")); + try std.testing.expect(!isXPathQuery("[data-x]")); + try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS + try std.testing.expect(!isXPathQuery(".x")); // leading dot without / +} + test "cdp.dom: querySelector unknown search id" { var ctx = try testing.context(); defer ctx.deinit(); From 0fcd47e1e112489317e868c4cc2d415a03547545 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:36:00 +0200 Subject: [PATCH 02/12] xpath: dupe expression into arena before parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Parser borrows string slices from its input for AST literals, names, and var refs. Without duping, the AST holds slices into the JS call_arena, which is reset when the top-level call returns — every subsequent evaluate() of a cached XPathExpression would dereference freed memory. --- src/browser/webapi/XPathExpression.zig | 7 ++++++- src/browser/webapi/XPathResult.zig | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index 6dba00fb..8c771d2e 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -50,7 +50,12 @@ pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { const arena = try frame.getArena(.tiny, "XPathExpression"); errdefer frame.releaseArena(arena); - const expr = try xpath.Parser.parse(arena, expression); + // The AST borrows string slices from its input (literals, names, + // var refs, function names). `expression` is materialized in the JS + // call_arena and is reclaimed when the top-level call returns, so + // dupe into our long-lived arena before parsing. + const owned = try arena.dupe(u8, expression); + const expr = try xpath.Parser.parse(arena, owned); const xe = try arena.create(XPathExpression); xe.* = .{ ._arena = arena, ._expr = expr }; return xe; diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 6bf5095b..1da520ec 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -95,7 +95,12 @@ pub fn fromExpression( const arena = try frame.getArena(.medium, "XPathResult"); errdefer frame.releaseArena(arena); - const expr = try xpath.Parser.parse(arena, expression); + // The AST borrows string slices from its input (literals, names, + // var refs, function names). `expression` is materialized in the JS + // call_arena and is reclaimed when the top-level call returns, so + // dupe into our long-lived arena before parsing. + const owned = try arena.dupe(u8, expression); + const expr = try xpath.Parser.parse(arena, owned); const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); return fromResult(arena, requested_type, result); } From 33714a4dfd8d4c47178f7f07a3850056be7c4058 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:36:13 +0200 Subject: [PATCH 03/12] cdp: tighten isXPathQuery '::' heuristic A bare indexOf("::") matched CSS pseudo-elements (a::before) and attribute values containing '::' ([data-x="x::y"]), misrouting them to the XPath evaluator. Require an axis-name shape ([a-zA-Z-]) immediately before '::' so only real axis specifiers like descendant::p are dispatched to XPath. --- src/cdp/domains/dom.zig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 33283b2d..843cc20e 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -104,7 +104,16 @@ fn isXPathQuery(q: []const u8) bool { if (q[1] == '/') return true; if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; } - return std.mem.indexOf(u8, q, "::") != null; + // Require axis-name shape immediately before `::` so CSS pseudo-elements + // (`a::before`) and attribute values containing `::` (`[data-x="x::y"]`) + // aren't misrouted to the XPath evaluator. + var idx: usize = 0; + while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) { + if (hit == 0) continue; + const c = q[hit - 1]; + if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-') return true; + } + return false; } // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch From a4abbb6d13fcae99fed3ba2fabebf1253820234d Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:37:12 +0200 Subject: [PATCH 04/12] xpath: cache attribute axis nodes via frame lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attribute axis was calling Entry.toAttribute on every visit, materializing fresh *Attribute structs (plus duped name/value strings) into page-lifetime storage. Repeated XPath queries — the Capybara/ Selenium polling pattern this PR targets — accumulated unbounded copies for the same DOM entries. Route through frame._attribute_lookup so each Entry resolves to a single cached *Attribute, matching List.getAttribute and NamedNodeMap.getAtIndex. --- src/browser/xpath/Evaluator.zig | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index a16d7b37..11f7a6c1 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -277,11 +277,15 @@ fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) E const el = node.is(Element) orelse return; var it = el.attributeIterator(); while (it.next()) |entry| { - // Materialize as full Attribute so the result is *Node-uniform. - // Allocates from frame.arena (long-lived); attribute axis is - // typically leaf, so churn is bounded. - const attr = try entry.toAttribute(el, self.frame); - try out.append(self.arena, attr._proto); + // Memoize via frame._attribute_lookup so repeated XPath queries + // (Capybara/Selenium polling) reuse the same *Attribute instead + // of leaking fresh ones into page-lifetime storage on every call. + // Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex. + const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry)); + if (!gop.found_existing) { + gop.value_ptr.* = try entry.toAttribute(el, self.frame); + } + try out.append(self.arena, gop.value_ptr.*._proto); } } From e7c3e77c414200c028dd450f27a2dcb1b9dc808f Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:37:39 +0200 Subject: [PATCH 05/12] xpath: match CDATASection in text() node test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per XPath 1.0 §5.7, the data model has no CDATASection node — CDATA content is part of the text node value. The text() node test was only matching DOM nodeType 3 (Text), silently excluding CDATA sections (nodeType 4) parsed via DOMParser/XMLDocument and inline foreign content like SVG with embedded scripts. --- src/browser/xpath/Evaluator.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 11f7a6c1..99202cbd 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -295,7 +295,10 @@ fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[] return switch (test_) { .type_test => |kind| switch (kind) { .node => true, - .text => node.getNodeType() == 3, + // XPath 1.0 §5.7: the data model has no CDATASection node — + // CDATA content is part of the text node value. Match both + // Text (3) and CDATASection (4) DOM node types. + .text => node.getNodeType() == 3 or node.getNodeType() == 4, .comment => node.getNodeType() == 8, .processing_instruction => node.getNodeType() == 7, }, From 94bcee63222c682dc759003a2a8165e03e2f19e6 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 18:19:44 +0200 Subject: [PATCH 06/12] xpath: apply review style/convention feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename Result.zig / Ast.zig / Functions.zig to snake_case (no top-level fields per Zig style guide) - Restructure imports across xpath module: lib (std/lp) → relative (further → nearer) → aliases - Move `frame` to last parameter on Evaluator.evaluate, searchAll, Functions.call, idFn (matches js bridge convention); call sites updated in webapi/XPath{Result,Expression}.zig and cdp/domains/dom.zig - Local-pos style in XPathResult.iterateNext --- src/browser/webapi/XPathExpression.zig | 4 ++-- src/browser/webapi/XPathResult.zig | 11 +++++---- src/browser/xpath/Evaluator.zig | 24 ++++++++++--------- src/browser/xpath/Parser.zig | 5 ++-- src/browser/xpath/{Ast.zig => ast.zig} | 0 .../xpath/{Functions.zig => functions.zig} | 16 +++++++------ src/browser/xpath/{Result.zig => result.zig} | 3 ++- src/cdp/domains/dom.zig | 2 +- 8 files changed, 36 insertions(+), 29 deletions(-) rename src/browser/xpath/{Ast.zig => ast.zig} (100%) rename src/browser/xpath/{Functions.zig => functions.zig} (98%) rename src/browser/xpath/{Result.zig => result.zig} (99%) diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index 8c771d2e..b24b6268 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -33,7 +33,7 @@ const Node = @import("Node.zig"); const XPathResult = @import("XPathResult.zig"); const xpath = struct { - const Ast = @import("../xpath/Ast.zig"); + const Ast = @import("../xpath/ast.zig"); const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); }; @@ -76,7 +76,7 @@ pub fn evaluate( const arena = try frame.getArena(.medium, "XPathResult"); errdefer frame.releaseArena(arena); - const eval_result = try xpath.Evaluator.evaluate(arena, frame, self._expr, context_node); + const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame); return XPathResult.fromResult(arena, requested_type, eval_result); } diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 1da520ec..44c29b44 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -46,7 +46,7 @@ const Node = @import("Node.zig"); const xpath = struct { const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); - const Result = @import("../xpath/Result.zig"); + const Result = @import("../xpath/result.zig"); }; const Allocator = std.mem.Allocator; @@ -101,7 +101,7 @@ pub fn fromExpression( // dupe into our long-lived arena before parsing. const owned = try arena.dupe(u8, expression); const expr = try xpath.Parser.parse(arena, owned); - const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); + const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame); return fromResult(arena, requested_type, result); } @@ -220,9 +220,10 @@ pub fn iterateNext(self: *XPathResult) !?*Node { if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) { return error.InvalidStateError; } - if (self._iter_pos >= self._value.nodes.len) return null; - const node = self._value.nodes[self._iter_pos]; - self._iter_pos += 1; + const pos = self._iter_pos; + if (pos >= self._value.nodes.len) return null; + const node = self._value.nodes[pos]; + self._iter_pos = pos + 1; return node; } diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 99202cbd..d654ed8f 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -30,17 +30,19 @@ //! reverse-axis positional predicates evaluate against proximity. const std = @import("std"); -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); -const Ast = @import("Ast.zig"); -const Parser = @import("Parser.zig"); -const Result = @import("Result.zig"); -const Functions = @import("Functions.zig"); const Node = @import("../webapi/Node.zig"); + +const Ast = @import("ast.zig"); +const Parser = @import("Parser.zig"); +const Result = @import("result.zig"); +const Functions = @import("functions.zig"); + +const Frame = lp.Frame; const Element = Node.Element; const Document = Node.Document; -const Frame = lp.Frame; +const Allocator = std.mem.Allocator; const Evaluator = @This(); @@ -62,7 +64,7 @@ frame: *Frame, /// Public entry. Returns the AST's value; node-sets are sorted into /// document order before return per XPath spec §3.3. -pub fn evaluate(arena: Allocator, frame: *Frame, expr: *const Ast.Expr, context_node: *Node) Error!Result.Result { +pub fn evaluate(arena: Allocator, expr: *const Ast.Expr, context_node: *Node, frame: *Frame) Error!Result.Result { var ev = Evaluator{ .arena = arena, .frame = frame }; const result = try ev.evalExpr(expr, context_node, 1, 1); if (result == .node_set) { @@ -77,9 +79,9 @@ pub const SearchError = Error || Parser.Error; /// evaluate and unwrap the node-set. Top-level scalar expressions yield /// an empty slice (decision #3 — these APIs are for finding nodes, not /// arbitrary computation). -pub fn searchAll(arena: Allocator, frame: *Frame, root: *Node, expression: []const u8) SearchError![]const *Node { +pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node { const expr = try Parser.parse(arena, expression); - return switch (try evaluate(arena, frame, expr, root)) { + return switch (try evaluate(arena, expr, root, frame)) { .node_set => |ns| ns, else => &.{}, }; @@ -506,7 +508,7 @@ fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: us const eval_args = try self.arena.alloc(Result.Result, fc.args.len); for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); - return Functions.call(self.arena, self.frame, fc.name, eval_args, ctx); + return Functions.call(self.arena, fc.name, eval_args, ctx, self.frame); } // ----- helpers ----- @@ -726,7 +728,7 @@ test "Evaluator: searchAll on scalar expression returns empty (decision #3)" { // the Frame or the context node. Adding a DOM-touching expression // (e.g. `id('x')`) to this list would crash on dereference. inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| { - const nodes = try searchAll(a, @ptrFromInt(0x1000), @ptrFromInt(0x2000), expr); + const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000)); try testing.expectEqual(@as(usize, 0), nodes.len); } } diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index 88d25b26..aa969e3f 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -25,11 +25,12 @@ //! and is valid for as long as the arena and input outlive it. const std = @import("std"); -const Allocator = std.mem.Allocator; const Tokenizer = @import("Tokenizer.zig"); +const Ast = @import("ast.zig"); + const Token = Tokenizer.Token; -const Ast = @import("Ast.zig"); +const Allocator = std.mem.Allocator; const Parser = @This(); diff --git a/src/browser/xpath/Ast.zig b/src/browser/xpath/ast.zig similarity index 100% rename from src/browser/xpath/Ast.zig rename to src/browser/xpath/ast.zig diff --git a/src/browser/xpath/Functions.zig b/src/browser/xpath/functions.zig similarity index 98% rename from src/browser/xpath/Functions.zig rename to src/browser/xpath/functions.zig index d0ae7eac..52cb4d14 100644 --- a/src/browser/xpath/Functions.zig +++ b/src/browser/xpath/functions.zig @@ -37,14 +37,16 @@ //! Allocations land in the caller's per-evaluation arena. const std = @import("std"); -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); -const Result = @import("Result.zig"); const Node = @import("../webapi/Node.zig"); + +const Result = @import("result.zig"); + +const Frame = lp.Frame; const Element = Node.Element; const Document = Node.Document; -const Frame = lp.Frame; +const Allocator = std.mem.Allocator; pub const Error = error{ OutOfMemory, @@ -59,14 +61,14 @@ pub const Error = error{ /// last lookup stop. pub fn call( arena: Allocator, - frame: *Frame, name: []const u8, args: []const Result.Result, ctx: *Node, + frame: *Frame, ) Error!Result.Result { // -- Node-set -- if (eql(name, "count")) return .{ .number = countFn(args) }; - if (eql(name, "id")) return idFn(arena, frame, args, ctx); + if (eql(name, "id")) return idFn(arena, args, ctx, frame); if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) }; if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) }; if (eql(name, "namespace-uri")) return .{ .string = "" }; @@ -111,7 +113,7 @@ fn countFn(args: []const Result.Result) f64 { return @floatFromInt(args[0].node_set.len); } -fn idFn(arena: Allocator, frame: *Frame, args: []const Result.Result, ctx: *Node) Error!Result.Result { +fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame) Error!Result.Result { if (args.len == 0) return .{ .node_set = &.{} }; // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar @@ -345,7 +347,7 @@ fn evalScalar(a: Allocator, src: []const u8) !Result.Result { // Synthetic Frame/Node pointers — the public `evaluate` entry only // touches the Frame for path/axis evaluation. Pure-scalar expressions // (arithmetic, function calls returning scalars) never deref it. - return Evaluator.evaluate(a, @ptrFromInt(0x1000), expr, @ptrFromInt(0x2000)); + return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000)); } test "Functions: count() of non-node-set returns 0" { diff --git a/src/browser/xpath/Result.zig b/src/browser/xpath/result.zig similarity index 99% rename from src/browser/xpath/Result.zig rename to src/browser/xpath/result.zig index c0822054..e71efe83 100644 --- a/src/browser/xpath/Result.zig +++ b/src/browser/xpath/result.zig @@ -26,10 +26,11 @@ //! shortcuts inherited from the polyfill (decision #2). const std = @import("std"); -const Allocator = std.mem.Allocator; const Node = @import("../webapi/Node.zig"); + const CData = Node.CData; +const Allocator = std.mem.Allocator; pub const Result = union(enum) { /// Owned by the evaluator's arena. Order is significant only at the diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 843cc20e..32af266c 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -130,7 +130,7 @@ fn performSearch(cmd: *CDP.Command) !void { if (isXPathQuery(params.query)) { const arena = try frame.getArena(.medium, "DOM.performSearch"); defer frame.releaseArena(arena); - const nodes = try xpath.searchAll(arena, frame, root, params.query); + const nodes = try xpath.searchAll(arena, root, params.query, frame); return finishSearch(cmd, bc, nodes); } From 379664044e8377f77d818bd4fea78596e5823227 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 18:21:34 +0200 Subject: [PATCH 07/12] xpath: apply review correctness feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Document.evaluate / XPathEvaluator.evaluate / XPathExpression.evaluate: result_type / requested_type now optional u16 defaulting to ANY_TYPE (matches WHATWG: `optional unsigned short type = 0`). context_node stays nullable with a fallback to the document — preserves the polyfill's behavior asserted by the `default_context` fixture - ast.zig NodeTest: clarify that namespaced names (`prefix:*`, `prefix:local`) are stored verbatim and fall through to a literal match against the node name — consistent with the `namespace::` axis stub (decision #3). Adds a TODO for if the polyfill ever drops the stub - Parser: cap recursive descent at depth 64 with new error.MaxDepthExceeded; depth tracked across parseExpr (parens, predicates, function args) and parseUnaryExpr (chained `-`). Two regression tests cover deep parenthesization and deep unary minus --- src/browser/webapi/Document.zig | 8 ++++-- src/browser/webapi/XPathEvaluator.zig | 4 +-- src/browser/webapi/XPathExpression.zig | 4 +-- src/browser/xpath/Parser.zig | 34 ++++++++++++++++++++++++++ src/browser/xpath/ast.zig | 11 +++++---- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 095fa48f..9e40d793 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -419,17 +419,21 @@ pub fn evaluate( expression: []const u8, context_node: ?*Node, resolver: ?js.Function, - result_type: u16, + result_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { // resolver/result are no-ops in HTML mode (decision #2). + // Null/missing context_node falls back to the document — matches the + // polyfill (decision #2). Firefox throws TypeError on a *missing* + // arg, but the bridge can't distinguish "missing" from "explicit + // null" here, so polyfill parity wins for the ambiguity. _ = resolver; _ = result; return XPathResult.fromExpression( expression, context_node orelse self.asNode(), - result_type, + result_type orelse XPathResult.ANY_TYPE, frame, ); } diff --git a/src/browser/webapi/XPathEvaluator.zig b/src/browser/webapi/XPathEvaluator.zig index ec651de0..7cae18b9 100644 --- a/src/browser/webapi/XPathEvaluator.zig +++ b/src/browser/webapi/XPathEvaluator.zig @@ -46,7 +46,7 @@ pub fn evaluate( expression: []const u8, context_node: *Node, resolver: ?js.Function, - requested_type: u16, + requested_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { @@ -55,7 +55,7 @@ pub fn evaluate( // allocates a fresh instance. _ = resolver; _ = result; - return XPathResult.fromExpression(expression, context_node, requested_type, frame); + return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame); } pub fn createExpression( diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index b24b6268..d801ac5a 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -64,7 +64,7 @@ pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { pub fn evaluate( self: *XPathExpression, context_node: *Node, - requested_type: u16, + requested_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { @@ -77,7 +77,7 @@ pub fn evaluate( errdefer frame.releaseArena(arena); const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame); - return XPathResult.fromResult(arena, requested_type, eval_result); + return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result); } pub fn deinit(self: *XPathExpression, page: *Page) void { diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index aa969e3f..b1a841d2 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -39,11 +39,18 @@ pub const Error = error{ UnexpectedToken, ExpectedNodeTest, ExpectedPrimaryExpr, + MaxDepthExceeded, }; +/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`, +/// `------5`) from blowing the stack. Real XPath expressions never come +/// close to this; browsers typically allow several hundred. +const max_depth: u16 = 64; + arena: Allocator, tokens: []const Token, pos: usize = 0, +depth: u16 = 0, pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { var token_list: std.ArrayList(Token) = .empty; @@ -125,6 +132,9 @@ fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr // Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path fn parseExpr(self: *Parser) Error!*Ast.Expr { + if (self.depth >= max_depth) return error.MaxDepthExceeded; + self.depth += 1; + defer self.depth -= 1; return self.parseOrExpr(); } @@ -190,6 +200,9 @@ fn parseMultExpr(self: *Parser) Error!*Ast.Expr { fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { if (self.match(.minus)) { + if (self.depth >= max_depth) return error.MaxDepthExceeded; + self.depth += 1; + defer self.depth -= 1; const operand = try self.parseUnaryExpr(); return try self.makeExpr(.{ .neg = operand }); } @@ -922,3 +935,24 @@ test "XPath.Parser: 91-case gem battery — every expression parses" { }; } } + +test "XPath.Parser: deep parenthesization rejected past max_depth" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + try buf.appendNTimes(testing.allocator, '(', max_depth + 1); + try buf.append(testing.allocator, '1'); + try buf.appendNTimes(testing.allocator, ')', max_depth + 1); + try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items)); +} + +test "XPath.Parser: deep unary minus rejected past max_depth" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + try buf.appendNTimes(testing.allocator, '-', max_depth + 1); + try buf.append(testing.allocator, '1'); + try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items)); +} diff --git a/src/browser/xpath/ast.zig b/src/browser/xpath/ast.zig index 00125e33..91e5a634 100644 --- a/src/browser/xpath/ast.zig +++ b/src/browser/xpath/ast.zig @@ -114,11 +114,12 @@ pub const Axis = enum { }; pub const NodeTest = union(enum) { - /// Element / attribute name. Special values: - /// - "*" → wildcard - /// - "prefix:*" → namespace wildcard - /// - "prefix:local" → namespace-prefixed name - /// The evaluator splits these. + /// Element / attribute name. `"*"` is the wildcard. Namespaced forms + /// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator + /// does not split them, so they fall through to a literal `mem.eql` + /// against the node name (consistent with the `namespace::` axis stub + /// per decision #3). + /// TODO: real namespace support if the polyfill ever drops the stub. name: []const u8, /// `node()`, `text()`, `comment()`, `processing-instruction()`. /// The optional target literal of `processing-instruction("foo")` From c4c700f7abb1bb726d738733ebec57239b72a9ce Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 19:19:00 +0200 Subject: [PATCH 08/12] xpath: id-lookup fast path + perf benchmark evalPath recognizes //tag[@id='x'] and .//tag[@id='x'] (plus the //*[@id='x'] wildcard) and serves them via frame.getElementByIdFromNode. ~100-150x speedup on ID lookups (3231us -> 22.6us for //*[@id='target'] in the new benchmark). Falls through to general path on any deviation (extra step, extra predicate, non-eq, non-literal RHS). Inherits the same duplicate-ID compromise selector/List.zig ships for querySelector(All): the id-map stores only the first element per ID in document order. Capybara/Selenium hot paths assume unique IDs. tests/xpath/xpath_perf.html is the 13-query micro-benchmark used to collect the numbers; batched console.warn output survives test runner interleaving. --- src/browser/tests/xpath/xpath_perf.html | 171 ++++++++++++++++++++++++ src/browser/webapi/XPathResult.zig | 4 + src/browser/xpath/Evaluator.zig | 121 +++++++++++++++++ 3 files changed, 296 insertions(+) create mode 100644 src/browser/tests/xpath/xpath_perf.html diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html new file mode 100644 index 00000000..0d31e052 --- /dev/null +++ b/src/browser/tests/xpath/xpath_perf.html @@ -0,0 +1,171 @@ + + + + XPath perf benchmark + + + + + + + + + + diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 44c29b44..2845480e 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -281,3 +281,7 @@ test "WebApi: XPathResult" { test "WebApi: XPath conformance" { try testing.htmlRunner("xpath/xpath_conformance.html", .{}); } + +test "WebApi: XPath perf" { + try testing.htmlRunner("xpath/xpath_perf.html", .{}); +} diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index d654ed8f..1e4b1b13 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -108,6 +108,8 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz } fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { + if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; + const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx; @@ -125,6 +127,125 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { return .{ .node_set = current_set }; } +// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']` +// shapes (and their wildcard `//*[@id='x']` variants) and serve them +// directly from `frame.getElementByIdFromNode`. Accepts the literal on +// either side of `=`. +// +// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector` +// already makes for `querySelector(All)`: the id-map only stores the +// first element per ID in document order, so duplicate IDs (invalid +// HTML, but possible) yield one match here where a strict tree walk +// would find all. Acceptable because Capybara/Selenium hot paths +// assume unique IDs and CSS has shipped this compromise for years. +// +// Falls through to the general path for any deviation: extra steps, +// extra predicates, non-eq predicate, non-literal RHS, or the +// inability to resolve a search root. +fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { + // Two acceptable AST shapes: + // //tag[@id='x'] parses to: ds::node() / child::tag[pred] + // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] + const target: Ast.Step = switch (path.steps.len) { + 2 => blk: { + if (!isDescendantOrSelfNode(path.steps[0])) return null; + break :blk path.steps[1]; + }, + 3 => blk: { + if (!isSelfNode(path.steps[0])) return null; + if (!isDescendantOrSelfNode(path.steps[1])) return null; + break :blk path.steps[2]; + }, + else => return null, + }; + + if (target.axis != .child) return null; + if (target.predicates.len != 1) return null; + + // Tag name (null = wildcard "*"). type_test (e.g. `node()`, + // `text()`) doesn't qualify because getElementByIdFromNode only + // returns elements. + const tag_name: ?[]const u8 = switch (target.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else n, + .type_test => return null, + }; + + const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null; + + // Resolve search root the same way the general path does. + const search_root: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse return null; + break :blk owner.asNode(); + } else ctx; + + const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse { + return Result.Result{ .node_set = &.{} }; + }; + const id_node = id_element.asNode(); + + // Relative paths must filter to descendants of the context. + // getElementByIdFromNode is doc-wide. + if (search_root != id_node and !search_root.contains(id_node)) { + return Result.Result{ .node_set = &.{} }; + } + + // Tag check (case-insensitive per decision #2). Element tag names + // are stored lowercase via `getTagNameLower`; lowercase the AST + // name once and compare. + if (tag_name) |tag| { + const lowered = try std.ascii.allocLowerString(self.arena, tag); + if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) { + return Result.Result{ .node_set = &.{} }; + } + } + + const out = try self.arena.alloc(*Node, 1); + out[0] = id_node; + return Result.Result{ .node_set = out }; +} + +fn isDescendantOrSelfNode(s: Ast.Step) bool { + if (s.axis != .descendant_or_self) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .type_test => |k| k == .node, + .name => false, + }; +} + +fn isSelfNode(s: Ast.Step) bool { + if (s.axis != .self) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .type_test => |k| k == .node, + .name => false, + }; +} + +fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 { + if (expr.* != .binop) return null; + const bo = expr.binop; + if (bo.op != .eq) return null; + if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal; + if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal; + return null; +} + +fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { + if (expr.* != .path) return false; + const p = expr.path; + if (p.absolute) return false; + if (p.steps.len != 1) return false; + const s = p.steps[0]; + if (s.axis != .attribute) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .name => |n| std.mem.eql(u8, n, attr_name), + .type_test => false, + }; +} + fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { const base = try self.evalExpr(fp.filter, ctx, pos, size); if (base != .node_set) return base; From ce722c1f6e4649130b4d73dec1994fcfe0cf7452 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 19:41:53 +0200 Subject: [PATCH 09/12] xpath: extend fast path to non-positional descendant queries Generalizes 8733e33b's //tag[@id='x'] shape: tryFusedDescendantFastPath handles any //tag[safe] or .//tag[safe] where the predicates are non-positional boolean/node-set checks. Walks the search root's descendants once in document order, applies node test + predicates inline, no per-step materialization, no dedup. 5-9x on //div, //*, //*[@class='x'], //div[contains(...)]; ~25x on (//div)[1] and count(//div) where the inner path is the shape. Safety gate rejects predicates that could produce a number at the top level (number, neg, arithmetic binop, numeric-returning fn-call) and any predicate containing position()/last() anywhere. Conservative: a nested sub-path's local positional predicate is rejected even though it's scoped to its own axis. --- src/browser/xpath/Evaluator.zig | 158 +++++++++++++++++++++++++++++--- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 1e4b1b13..4dfcc85c 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -109,6 +109,7 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; + if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result; const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; @@ -146,18 +147,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu // Two acceptable AST shapes: // //tag[@id='x'] parses to: ds::node() / child::tag[pred] // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] - const target: Ast.Step = switch (path.steps.len) { - 2 => blk: { - if (!isDescendantOrSelfNode(path.steps[0])) return null; - break :blk path.steps[1]; - }, - 3 => blk: { - if (!isSelfNode(path.steps[0])) return null; - if (!isDescendantOrSelfNode(path.steps[1])) return null; - break :blk path.steps[2]; - }, - else => return null, - }; + const target = matchDescendantPathShape(path) orelse return null; if (target.axis != .child) return null; if (target.predicates.len != 1) return null; @@ -205,6 +195,150 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu return Result.Result{ .node_set = out }; } +// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same +// AST shape (`//[preds]` / `.//[preds]`), but instead of +// dispatching to `getElementByIdFromNode`, walks the descendants of +// the search root once in document order, applying the node test and +// any "safe" non-positional predicates inline. Skips the general path's +// per-step axis materialization, the per-step `filtered`/`current` +// ArrayLists, and the dedup hash map (single-context forward walk +// already preserves doc order). +// +// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`, +// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`. +// +// "Safe" predicates: not numeric at the top level (number, neg, +// arithmetic binop, or a fn-call returning a number), and free of +// `position()`/`last()` anywhere in the predicate AST. Numeric predicates +// would need `position()` context which the fused walk doesn't track, +// and a `position()`/`last()` reference inside a sub-path's own step is +// rejected conservatively even though it's local to that sub-axis. +fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { + const target = matchDescendantPathShape(path) orelse return null; + if (target.axis != .child) return null; + + for (target.predicates) |p| { + if (!isSafeNonPositionalPredicate(p)) return null; + } + + const lowered_name: ?[]const u8 = switch (target.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n), + .type_test => null, + }; + + const search_root: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse return null; + break :blk owner.asNode(); + } else ctx; + + var out: std.ArrayList(*Node) = .empty; + try self.fusedDescend(search_root, target, lowered_name, &out); + return Result.Result{ .node_set = out.items }; +} + +fn fusedDescend( + self: *Evaluator, + parent: *Node, + target: Ast.Step, + lowered_name: ?[]const u8, + out: *std.ArrayList(*Node), +) Error!void { + var it = parent.childrenIterator(); + while (it.next()) |c| { + if (matchTest(c, target.node_test, target.axis, lowered_name)) { + var ok = true; + for (target.predicates) |pred| { + // Position / size are synthetic. Safe because the + // predicate-safety gate already rejected any expression + // that depends on either. + const val = try self.evalExpr(pred, c, 1, 1); + if (!Result.toBoolean(val)) { + ok = false; + break; + } + } + if (ok) try out.append(self.arena, c); + } + try self.fusedDescend(c, target, lowered_name, out); + } +} + +fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { + return switch (path.steps.len) { + 2 => blk: { + if (!isDescendantOrSelfNode(path.steps[0])) break :blk null; + break :blk path.steps[1]; + }, + 3 => blk: { + if (!isSelfNode(path.steps[0])) break :blk null; + if (!isDescendantOrSelfNode(path.steps[1])) break :blk null; + break :blk path.steps[2]; + }, + else => null, + }; +} + +fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool { + if (isNumericTopLevel(expr)) return false; + if (containsPositionOrLast(expr)) return false; + return true; +} + +fn isNumericTopLevel(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .neg => true, + .binop => |bo| switch (bo.op) { + .add, .sub, .mul, .div, .mod => true, + else => false, + }, + .fn_call => |fc| isNumericFnName(fc.name), + else => false, + }; +} + +fn isNumericFnName(name: []const u8) bool { + const numeric = [_][]const u8{ + "position", "last", "count", "sum", + "floor", "ceiling", "round", "number", + "string-length", + }; + for (numeric) |n| { + if (std.mem.eql(u8, name, n)) return true; + } + return false; +} + +fn containsPositionOrLast(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .literal, .var_ref => false, + .neg => |inner| containsPositionOrLast(inner), + .binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right), + .filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate), + .filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps), + .path => |p| stepsContainPositionOrLast(p.steps), + .fn_call => |fc| std.mem.eql(u8, fc.name, "position") or + std.mem.eql(u8, fc.name, "last") or + argsContainPositionOrLast(fc.args), + }; +} + +fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { + for (steps) |s| { + for (s.predicates) |p| { + if (containsPositionOrLast(p)) return true; + } + } + return false; +} + +fn argsContainPositionOrLast(args: []const *Ast.Expr) bool { + for (args) |a| { + if (containsPositionOrLast(a)) return true; + } + return false; +} + fn isDescendantOrSelfNode(s: Ast.Step) bool { if (s.axis != .descendant_or_self) return false; if (s.predicates.len != 0) return false; From 9830da04d80e5f4912c63ab878694cbea009619c Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 8 May 2026 08:22:18 +0800 Subject: [PATCH 10/12] Naming convention fixes Disable xpath_perf benchmark from test run as its quite verbose. --- src/browser/tests/xpath/xpath_perf.html | 4 +- src/browser/webapi/XPathResult.zig | 21 ++-- src/browser/xpath/Evaluator.zig | 140 ++++++++++----------- src/browser/xpath/Parser.zig | 160 ++++++++++++------------ src/browser/xpath/functions.zig | 108 ++++++++-------- 5 files changed, 217 insertions(+), 216 deletions(-) diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html index 0d31e052..21cac3fc 100644 --- a/src/browser/tests/xpath/xpath_perf.html +++ b/src/browser/tests/xpath/xpath_perf.html @@ -16,8 +16,8 @@ mismatch fails the test loudly via testing.fail so a regression in result count can't be hidden by the timing line. - Run: make test F=xpath_perf - Filter: make test F=xpath_perf 2>&1 | grep '\[xpath-perf\]' + Tun run, uncomment the test in XPathResult.zig (bottom of the file), then: + Run: make test F="#xpath_perf" Query shapes target the optimization roadmap: //*[@id='x'] — global ID lookup (fast-path candidate) diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 2845480e..c029b48e 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -17,7 +17,7 @@ // along with this program. If not, see . //! WHATWG `XPathResult` (full surface, all 10 type constants — decision -//! #4). Wraps the evaluator's `Result.Result` for JS consumption: +//! #4). Wraps the evaluator's `result.Result` for JS consumption: //! coerces to the requested result type at construction, exposes the //! type-tagged accessors, and serves the iterator/snapshot APIs. //! @@ -44,9 +44,9 @@ const Node = @import("Node.zig"); // XPath runtime helpers. Aliased to keep the cross-directory imports // readable when both modules expose a `Result` type. const xpath = struct { + const result = @import("../xpath/result.zig"); const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); - const Result = @import("../xpath/result.zig"); }; const Allocator = std.mem.Allocator; @@ -105,14 +105,14 @@ pub fn fromExpression( return fromResult(arena, requested_type, result); } -/// Wrap an already-evaluated `Result.Result` into an XPathResult. The +/// Wrap an already-evaluated `result.result` into an XPathResult. The /// caller hands over ownership of `arena` — the XPathResult will release /// it on deinit. Used by `XPathExpression.evaluate` (which has its own /// AST cache and only allocates a fresh result arena). pub fn fromResult( arena: Allocator, requested_type: u16, - result: xpath.Result.Result, + result: xpath.result.Result, ) !*XPathResult { const value: Value = switch (requested_type) { ANY_TYPE => switch (result) { @@ -121,9 +121,9 @@ pub fn fromResult( .boolean => |b| .{ .boolean = b }, .node_set => |ns| .{ .nodes = ns }, }, - NUMBER_TYPE => .{ .number = try xpath.Result.toNumber(arena, result) }, - STRING_TYPE => .{ .string = try xpath.Result.toString(arena, result) }, - BOOLEAN_TYPE => .{ .boolean = xpath.Result.toBoolean(result) }, + NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) }, + STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) }, + BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) }, UNORDERED_NODE_ITERATOR_TYPE, ORDERED_NODE_ITERATOR_TYPE, UNORDERED_NODE_SNAPSHOT_TYPE, @@ -282,6 +282,7 @@ test "WebApi: XPath conformance" { try testing.htmlRunner("xpath/xpath_conformance.html", .{}); } -test "WebApi: XPath perf" { - try testing.htmlRunner("xpath/xpath_perf.html", .{}); -} +// This uses console.warn, uncomment if you want to run it +// test "WebApi: XPath perf" { +// try testing.htmlRunner("xpath/xpath_perf.html", .{}); +// } diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 4dfcc85c..c0d9ba5e 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -16,7 +16,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -//! XPath 1.0 evaluator — runs an `Ast.Expr` against a context node and +//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and //! produces a `Result`. Mirrors the polyfill's `evaluate()` and //! `evalStep()` (lib/capybara/lightpanda/javascripts/index.js, lines //! 344–644). The evaluator allocates intermediate values (node-set @@ -34,10 +34,10 @@ const lp = @import("lightpanda"); const Node = @import("../webapi/Node.zig"); -const Ast = @import("ast.zig"); +const ast = @import("ast.zig"); const Parser = @import("Parser.zig"); -const Result = @import("result.zig"); -const Functions = @import("functions.zig"); +const result = @import("result.zig"); +const functions = @import("functions.zig"); const Frame = lp.Frame; const Element = Node.Element; @@ -64,13 +64,13 @@ frame: *Frame, /// Public entry. Returns the AST's value; node-sets are sorted into /// document order before return per XPath spec §3.3. -pub fn evaluate(arena: Allocator, expr: *const Ast.Expr, context_node: *Node, frame: *Frame) Error!Result.Result { +pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result { var ev = Evaluator{ .arena = arena, .frame = frame }; - const result = try ev.evalExpr(expr, context_node, 1, 1); - if (result == .node_set) { - sortDocOrder(@constCast(result.node_set)); + const res = try ev.evalExpr(expr, context_node, 1, 1); + if (res == .node_set) { + sortDocOrder(@constCast(res.node_set)); } - return result; + return res; } pub const SearchError = Error || Parser.Error; @@ -89,14 +89,14 @@ pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: * // ----- AST evaluation ----- -fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result { return switch (expr.*) { .number => |n| .{ .number = n }, .literal => |s| .{ .string = s }, .var_ref => .{ .string = "" }, // decision #3 stub .neg => |inner| blk: { const v = try self.evalExpr(inner, ctx, pos, size); - const n = try Result.toNumber(self.arena, v); + const n = try result.toNumber(self.arena, v); break :blk .{ .number = -n }; }, .binop => |bo| try self.evalBinop(bo, ctx, pos, size), @@ -107,9 +107,9 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz }; } -fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { - if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; - if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result; +fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result { + if (try self.tryIdLookupFastPath(path, ctx)) |res| return res; + if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res; const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; @@ -143,7 +143,7 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { // Falls through to the general path for any deviation: extra steps, // extra predicates, non-eq predicate, non-literal RHS, or the // inability to resolve a search root. -fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { +fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result { // Two acceptable AST shapes: // //tag[@id='x'] parses to: ds::node() / child::tag[pred] // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] @@ -170,14 +170,14 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu } else ctx; const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; }; const id_node = id_element.asNode(); // Relative paths must filter to descendants of the context. // getElementByIdFromNode is doc-wide. if (search_root != id_node and !search_root.contains(id_node)) { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; } // Tag check (case-insensitive per decision #2). Element tag names @@ -186,13 +186,13 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu if (tag_name) |tag| { const lowered = try std.ascii.allocLowerString(self.arena, tag); if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; } } const out = try self.arena.alloc(*Node, 1); out[0] = id_node; - return Result.Result{ .node_set = out }; + return .{ .node_set = out }; } // Generalization of `tryIdLookupFastPath` to non-ID predicates. Same @@ -213,7 +213,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu // would need `position()` context which the fused walk doesn't track, // and a `position()`/`last()` reference inside a sub-path's own step is // rejected conservatively even though it's local to that sub-axis. -fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { +fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result { const target = matchDescendantPathShape(path) orelse return null; if (target.axis != .child) return null; @@ -234,13 +234,13 @@ fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Erro var out: std.ArrayList(*Node) = .empty; try self.fusedDescend(search_root, target, lowered_name, &out); - return Result.Result{ .node_set = out.items }; + return .{ .node_set = out.items }; } fn fusedDescend( self: *Evaluator, parent: *Node, - target: Ast.Step, + target: ast.Step, lowered_name: ?[]const u8, out: *std.ArrayList(*Node), ) Error!void { @@ -253,7 +253,7 @@ fn fusedDescend( // predicate-safety gate already rejected any expression // that depends on either. const val = try self.evalExpr(pred, c, 1, 1); - if (!Result.toBoolean(val)) { + if (!result.toBoolean(val)) { ok = false; break; } @@ -264,7 +264,7 @@ fn fusedDescend( } } -fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { +fn matchDescendantPathShape(path: ast.Path) ?ast.Step { return switch (path.steps.len) { 2 => blk: { if (!isDescendantOrSelfNode(path.steps[0])) break :blk null; @@ -279,13 +279,13 @@ fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { }; } -fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool { +fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool { if (isNumericTopLevel(expr)) return false; if (containsPositionOrLast(expr)) return false; return true; } -fn isNumericTopLevel(expr: *const Ast.Expr) bool { +fn isNumericTopLevel(expr: *const ast.Expr) bool { return switch (expr.*) { .number, .neg => true, .binop => |bo| switch (bo.op) { @@ -309,7 +309,7 @@ fn isNumericFnName(name: []const u8) bool { return false; } -fn containsPositionOrLast(expr: *const Ast.Expr) bool { +fn containsPositionOrLast(expr: *const ast.Expr) bool { return switch (expr.*) { .number, .literal, .var_ref => false, .neg => |inner| containsPositionOrLast(inner), @@ -323,7 +323,7 @@ fn containsPositionOrLast(expr: *const Ast.Expr) bool { }; } -fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { +fn stepsContainPositionOrLast(steps: []const ast.Step) bool { for (steps) |s| { for (s.predicates) |p| { if (containsPositionOrLast(p)) return true; @@ -332,14 +332,14 @@ fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { return false; } -fn argsContainPositionOrLast(args: []const *Ast.Expr) bool { +fn argsContainPositionOrLast(args: []const *ast.Expr) bool { for (args) |a| { if (containsPositionOrLast(a)) return true; } return false; } -fn isDescendantOrSelfNode(s: Ast.Step) bool { +fn isDescendantOrSelfNode(s: ast.Step) bool { if (s.axis != .descendant_or_self) return false; if (s.predicates.len != 0) return false; return switch (s.node_test) { @@ -348,7 +348,7 @@ fn isDescendantOrSelfNode(s: Ast.Step) bool { }; } -fn isSelfNode(s: Ast.Step) bool { +fn isSelfNode(s: ast.Step) bool { if (s.axis != .self) return false; if (s.predicates.len != 0) return false; return switch (s.node_test) { @@ -357,7 +357,7 @@ fn isSelfNode(s: Ast.Step) bool { }; } -fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 { +fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 { if (expr.* != .binop) return null; const bo = expr.binop; if (bo.op != .eq) return null; @@ -366,7 +366,7 @@ fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 return null; } -fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { +fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool { if (expr.* != .path) return false; const p = expr.path; if (p.absolute) return false; @@ -380,7 +380,7 @@ fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { }; } -fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result { const base = try self.evalExpr(fp.filter, ctx, pos, size); if (base != .node_set) return base; @@ -392,7 +392,7 @@ fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, return .{ .node_set = current }; } -fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result { const base = try self.evalExpr(f.expr, ctx, pos, size); if (base != .node_set) return base; @@ -408,7 +408,7 @@ fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usi // ----- step + axis ----- -fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Result.Result { +fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result { var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; // Pre-lowercase the name test once per step. matchNameTest does @@ -447,7 +447,7 @@ fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Re return .{ .node_set = dedup.keys() }; } -fn axisNodes(self: *Evaluator, node: *Node, axis: Ast.Axis) Error![]const *Node { +fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node { var out: std.ArrayList(*Node) = .empty; switch (axis) { .child => { @@ -548,7 +548,7 @@ fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) E // ----- node test matching ----- -fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[]const u8) bool { +fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool { return switch (test_) { .type_test => |kind| switch (kind) { .node => true, @@ -563,7 +563,7 @@ fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[] }; } -fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[]const u8) bool { +fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool { // `lowered_name` is non-null iff `name != "*"`. Element tag names // (`getTagNameLower`) and html5ever-stored attribute names are already // lowercase, so a plain `mem.eql` against the pre-lowered test name @@ -583,19 +583,19 @@ fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[ // ----- binop ----- -fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result { switch (bo.op) { .or_ => { const l = try self.evalExpr(bo.left, ctx, pos, size); - if (Result.toBoolean(l)) return .{ .boolean = true }; + if (result.toBoolean(l)) return .{ .boolean = true }; const r = try self.evalExpr(bo.right, ctx, pos, size); - return .{ .boolean = Result.toBoolean(r) }; + return .{ .boolean = result.toBoolean(r) }; }, .and_ => { const l = try self.evalExpr(bo.left, ctx, pos, size); - if (!Result.toBoolean(l)) return .{ .boolean = false }; + if (!result.toBoolean(l)) return .{ .boolean = false }; const r = try self.evalExpr(bo.right, ctx, pos, size); - return .{ .boolean = Result.toBoolean(r) }; + return .{ .boolean = result.toBoolean(r) }; }, .eq, .neq, .lt, .gt, .lte, .gte => { const l = try self.evalExpr(bo.left, ctx, pos, size); @@ -605,8 +605,8 @@ fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usiz .add, .sub, .mul, .div, .mod => { const l = try self.evalExpr(bo.left, ctx, pos, size); const r = try self.evalExpr(bo.right, ctx, pos, size); - const ln = try Result.toNumber(self.arena, l); - const rn = try Result.toNumber(self.arena, r); + const ln = try result.toNumber(self.arena, l); + const rn = try result.toNumber(self.arena, r); const v: f64 = switch (bo.op) { .add => ln + rn, .sub => ln - rn, @@ -635,7 +635,7 @@ fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usiz // ----- comparison (XPath spec §3.4) ----- -fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.BinOpKind) Error!bool { +fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool { const is_eq = (op == .eq or op == .neq); const l_is_set = (left == .node_set); const r_is_set = (right == .node_set); @@ -646,15 +646,15 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin // (e.g. `//foo = //bar` on a large page). const right_strings = try self.arena.alloc([]const u8, right.node_set.len); for (right.node_set, 0..) |r, i| { - right_strings[i] = try Result.stringValueOf(self.arena, r); + right_strings[i] = try result.stringValueOf(self.arena, r); } for (left.node_set) |l| { - const lv = try Result.stringValueOf(self.arena, l); + const lv = try result.stringValueOf(self.arena, l); for (right_strings) |rv| { const matched = if (is_eq) cmpString(lv, rv, op) else - cmpNumber(Result.stringToNumber(lv), Result.stringToNumber(rv), op); + cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op); if (matched) return true; } } @@ -673,10 +673,10 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin } for (ns) |n| { - const sv = try Result.stringValueOf(self.arena, n); + const sv = try result.stringValueOf(self.arena, n); const matched = switch (other) { .number => |num| blk: { - const sv_num = Result.stringToNumber(sv); + const sv_num = result.stringToNumber(sv); const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num }; break :blk cmpNumber(a, b, op); }, @@ -685,8 +685,8 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin const a, const b = if (ns_left) .{ sv, s } else .{ s, sv }; break :blk cmpString(a, b, op); } - const sv_num = Result.stringToNumber(sv); - const s_num = Result.stringToNumber(s); + const sv_num = result.stringToNumber(sv); + const s_num = result.stringToNumber(s); const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num }; break :blk cmpNumber(a, b, op); }, @@ -700,24 +700,24 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin // Neither is a node-set. if (is_eq) { if (left == .boolean or right == .boolean) { - return cmpBool(Result.toBoolean(left), Result.toBoolean(right), op); + return cmpBool(result.toBoolean(left), result.toBoolean(right), op); } if (left == .number or right == .number) { - const ln = try Result.toNumber(self.arena, left); - const rn = try Result.toNumber(self.arena, right); + const ln = try result.toNumber(self.arena, left); + const rn = try result.toNumber(self.arena, right); return cmpNumber(ln, rn, op); } - const ls = try Result.toString(self.arena, left); - const rs = try Result.toString(self.arena, right); + const ls = try result.toString(self.arena, left); + const rs = try result.toString(self.arena, right); return cmpString(ls, rs, op); } // Non-eq with no node-set: both → number. - const ln = try Result.toNumber(self.arena, left); - const rn = try Result.toNumber(self.arena, right); + const ln = try result.toNumber(self.arena, left); + const rn = try result.toNumber(self.arena, right); return cmpNumber(ln, rn, op); } -fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { +fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool { const equal = std.mem.eql(u8, a, b); return switch (op) { .eq => equal, @@ -726,7 +726,7 @@ fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { }; } -fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { +fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool { // Native f64 comparison gives correct NaN semantics: // NaN == X is false, NaN != X is true, NaN < X (etc.) is false. return switch (op) { @@ -740,7 +740,7 @@ fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { }; } -fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { +fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool { return switch (op) { .eq => a == b, .neq => a != b, @@ -750,9 +750,9 @@ fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { // ----- function calls ----- -fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result { // position()/last() stay here — they need the (pos, size) closure - // that Functions.call doesn't see. Keeping them inline avoids + // that functions.call doesn't see. Keeping them inline avoids // pushing per-call context through Functions' signature. if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) }; if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) }; @@ -760,20 +760,20 @@ fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: us // Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)` // pattern; lazy short-circuit isn't needed because `or`/`and` are // binops handled in evalBinop, not function calls. - const eval_args = try self.arena.alloc(Result.Result, fc.args.len); + const eval_args = try self.arena.alloc(result.Result, fc.args.len); for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); - return Functions.call(self.arena, fc.name, eval_args, ctx, self.frame); + return functions.call(self.arena, fc.name, eval_args, ctx, self.frame); } // ----- helpers ----- -fn predicateMatches(val: Result.Result, position: usize) bool { +fn predicateMatches(val: result.Result, position: usize) bool { return switch (val) { // Numeric predicate value selects only the node at that position // (1-based). Non-integer numbers never match. .number => |n| n == @as(f64, @floatFromInt(position)), - else => Result.toBoolean(val), + else => result.toBoolean(val), }; } diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index b1a841d2..96eda3b4 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -20,14 +20,14 @@ //! //! Mirrors the polyfill `Parser.prototype.*` chain in capybara-lightpanda //! (lib/capybara/lightpanda/javascripts/index.js): recursive descent over -//! a fully-tokenized stream, producing an `Ast.Expr` tree allocated on +//! a fully-tokenized stream, producing an `ast.Expr` tree allocated on //! the caller's arena. The AST borrows string/name slices from `input` //! and is valid for as long as the arena and input outlive it. const std = @import("std"); const Tokenizer = @import("Tokenizer.zig"); -const Ast = @import("ast.zig"); +const ast = @import("ast.zig"); const Token = Tokenizer.Token; const Allocator = std.mem.Allocator; @@ -52,7 +52,7 @@ tokens: []const Token, pos: usize = 0, depth: u16 = 0, -pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { +pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr { var token_list: std.ArrayList(Token) = .empty; // Token count is bounded by input length; ¼-byte-per-token is // generous for typical XPath and skips ArrayList regrowth. @@ -117,13 +117,13 @@ fn matchKeyword(self: *Parser, keyword: []const u8) bool { return false; } -fn makeExpr(self: *Parser, value: Ast.Expr) Error!*Ast.Expr { - const expr = try self.arena.create(Ast.Expr); +fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr { + const expr = try self.arena.create(ast.Expr); expr.* = value; return expr; } -fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr) Error!*Ast.Expr { +fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr { return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } }); } @@ -131,14 +131,14 @@ fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr // // Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path -fn parseExpr(self: *Parser) Error!*Ast.Expr { +fn parseExpr(self: *Parser) Error!*ast.Expr { if (self.depth >= max_depth) return error.MaxDepthExceeded; self.depth += 1; defer self.depth -= 1; return self.parseOrExpr(); } -fn parseOrExpr(self: *Parser) Error!*Ast.Expr { +fn parseOrExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseAndExpr(); while (self.matchKeyword("or")) { const right = try self.parseAndExpr(); @@ -147,7 +147,7 @@ fn parseOrExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseAndExpr(self: *Parser) Error!*Ast.Expr { +fn parseAndExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseEqualityExpr(); while (self.matchKeyword("and")) { const right = try self.parseEqualityExpr(); @@ -156,7 +156,7 @@ fn parseAndExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { +fn parseEqualityExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseRelationalExpr(); while (equalityOp(self.peek())) |op| { _ = self.advance(); @@ -166,7 +166,7 @@ fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { +fn parseRelationalExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseAdditiveExpr(); while (relationalOp(self.peek())) |op| { _ = self.advance(); @@ -176,7 +176,7 @@ fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { +fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseMultExpr(); while (additiveOp(self.peek())) |op| { _ = self.advance(); @@ -188,7 +188,7 @@ fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { // After a complete unary expression, `*` is multiply; `div`/`mod` are // operator-position keywords (tokenized as Name). -fn parseMultExpr(self: *Parser) Error!*Ast.Expr { +fn parseMultExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseUnaryExpr(); while (multOp(self.peek())) |op| { _ = self.advance(); @@ -198,7 +198,7 @@ fn parseMultExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { +fn parseUnaryExpr(self: *Parser) Error!*ast.Expr { if (self.match(.minus)) { if (self.depth >= max_depth) return error.MaxDepthExceeded; self.depth += 1; @@ -209,7 +209,7 @@ fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { return self.parseUnionExpr(); } -fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { +fn parseUnionExpr(self: *Parser) Error!*ast.Expr { var left = try self.parsePathExpr(); while (self.match(.pipe)) { const right = try self.parsePathExpr(); @@ -220,7 +220,7 @@ fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { // --- path expressions --- -fn parsePathExpr(self: *Parser) Error!*Ast.Expr { +fn parsePathExpr(self: *Parser) Error!*ast.Expr { const t = self.peek(); if (t == .slash or t == .double_slash) { @@ -245,7 +245,7 @@ fn parsePathExpr(self: *Parser) Error!*Ast.Expr { } if (self.peek() == .slash or self.peek() == .double_slash) { const dsl = self.advance() == .double_slash; - var steps: std.ArrayList(Ast.Step) = .empty; + var steps: std.ArrayList(ast.Step) = .empty; if (dsl) try steps.append(self.arena, descendantOrSelfStep()); try self.parseRelStepsInto(&steps); return try self.makeExpr(.{ .filter_path = .{ @@ -259,8 +259,8 @@ fn parsePathExpr(self: *Parser) Error!*Ast.Expr { return self.parseRelPath(); } -fn parseAbsPath(self: *Parser) Error!*Ast.Expr { - var steps: std.ArrayList(Ast.Step) = .empty; +fn parseAbsPath(self: *Parser) Error!*ast.Expr { + var steps: std.ArrayList(ast.Step) = .empty; if (self.match(.double_slash)) { try steps.append(self.arena, descendantOrSelfStep()); try self.parseRelStepsInto(&steps); @@ -275,8 +275,8 @@ fn parseAbsPath(self: *Parser) Error!*Ast.Expr { } }); } -fn parseRelPath(self: *Parser) Error!*Ast.Expr { - var steps: std.ArrayList(Ast.Step) = .empty; +fn parseRelPath(self: *Parser) Error!*ast.Expr { + var steps: std.ArrayList(ast.Step) = .empty; try self.parseRelStepsInto(&steps); return try self.makeExpr(.{ .path = .{ .absolute = false, @@ -284,7 +284,7 @@ fn parseRelPath(self: *Parser) Error!*Ast.Expr { } }); } -fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(Ast.Step)) Error!void { +fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void { try steps.append(self.arena, try self.parseStep()); while (self.peek() == .slash or self.peek() == .double_slash) { if (self.advance() == .double_slash) { @@ -301,13 +301,13 @@ fn canStartStep(self: *const Parser) bool { }; } -fn parseStep(self: *Parser) Error!Ast.Step { +fn parseStep(self: *Parser) Error!ast.Step { // Abbreviated steps `.` and `..` carry no axis, node-test, or // predicates — predicates after `.` are a parse error per polyfill. if (self.match(.dot)) return abbreviatedStep(.self); if (self.match(.double_dot)) return abbreviatedStep(.parent); - var axis: Ast.Axis = .child; + var axis: ast.Axis = .child; if (self.match(.at)) { axis = .attribute; } else if (self.peek() == .name and self.lookahead(1) == .double_colon) { @@ -318,7 +318,7 @@ fn parseStep(self: *Parser) Error!Ast.Step { const node_test = try self.parseNodeTest(); - var preds: std.ArrayList(*Ast.Expr) = .empty; + var preds: std.ArrayList(*ast.Expr) = .empty; while (self.match(.lbracket)) { const pred = try self.parseExpr(); _ = try self.expect(.rbracket); @@ -328,7 +328,7 @@ fn parseStep(self: *Parser) Error!Ast.Step { return .{ .axis = axis, .node_test = node_test, .predicates = preds.items }; } -fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { +fn parseNodeTest(self: *Parser) Error!ast.NodeTest { if (self.match(.star)) return .{ .name = "*" }; if (self.peek() != .name) return error.ExpectedNodeTest; @@ -349,7 +349,7 @@ fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { return .{ .name = name }; } -fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { +fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr { switch (self.peek()) { .string => |s| { _ = self.advance(); @@ -373,7 +373,7 @@ fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { .name => |name| { _ = self.advance(); _ = try self.expect(.lparen); - var args: std.ArrayList(*Ast.Expr) = .empty; + var args: std.ArrayList(*ast.Expr) = .empty; if (self.peek() != .rparen) { try args.append(self.arena, try self.parseExpr()); while (self.match(.comma)) { @@ -389,7 +389,7 @@ fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { // --- pure helpers --- -fn equalityOp(t: Token) ?Ast.BinOpKind { +fn equalityOp(t: Token) ?ast.BinOpKind { return switch (t) { .eq => .eq, .neq => .neq, @@ -397,7 +397,7 @@ fn equalityOp(t: Token) ?Ast.BinOpKind { }; } -fn relationalOp(t: Token) ?Ast.BinOpKind { +fn relationalOp(t: Token) ?ast.BinOpKind { return switch (t) { .lt => .lt, .gt => .gt, @@ -407,7 +407,7 @@ fn relationalOp(t: Token) ?Ast.BinOpKind { }; } -fn additiveOp(t: Token) ?Ast.BinOpKind { +fn additiveOp(t: Token) ?ast.BinOpKind { return switch (t) { .plus => .add, .minus => .sub, @@ -415,7 +415,7 @@ fn additiveOp(t: Token) ?Ast.BinOpKind { }; } -fn multOp(t: Token) ?Ast.BinOpKind { +fn multOp(t: Token) ?ast.BinOpKind { return switch (t) { .star => .mul, .name => |name| blk: { @@ -427,7 +427,7 @@ fn multOp(t: Token) ?Ast.BinOpKind { }; } -fn descendantOrSelfStep() Ast.Step { +fn descendantOrSelfStep() ast.Step { return .{ .axis = .descendant_or_self, .node_test = .{ .type_test = .node }, @@ -435,7 +435,7 @@ fn descendantOrSelfStep() Ast.Step { }; } -fn abbreviatedStep(axis: Ast.Axis) Ast.Step { +fn abbreviatedStep(axis: ast.Axis) ast.Step { return .{ .axis = axis, .node_test = .{ .type_test = .node }, @@ -447,18 +447,18 @@ fn isNodeTypeName(name: []const u8) bool { return typeTestKind(name) != null; } -const type_test_lookup = std.StaticStringMap(Ast.TypeTest).initComptime(.{ +const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{ .{ "node", .node }, .{ "text", .text }, .{ "comment", .comment }, .{ "processing-instruction", .processing_instruction }, }); -fn typeTestKind(name: []const u8) ?Ast.TypeTest { +fn typeTestKind(name: []const u8) ?ast.TypeTest { return type_test_lookup.get(name); } -const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ +const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{ .{ "child", .child }, .{ "descendant", .descendant }, .{ "descendant-or-self", .descendant_or_self }, @@ -474,7 +474,7 @@ const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ .{ "namespace", .namespace }, }); -fn parseAxisName(name: []const u8) Ast.Axis { +fn parseAxisName(name: []const u8) ast.Axis { return axis_lookup.get(name) orelse .unknown; } @@ -484,7 +484,7 @@ fn parseAxisName(name: []const u8) Ast.Axis { const testing = std.testing; -fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *Ast.Expr } { +fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } { var arena = std.heap.ArenaAllocator.init(testing.allocator); errdefer arena.deinit(); const expr = try parse(arena.allocator(), input); @@ -538,10 +538,10 @@ test "XPath.Parser: arithmetic precedence — mul binds tighter than add" { defer fx.arena.deinit(); // Expected AST: add(1, mul(2, 3)) const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.add, top.op); + try testing.expectEqual(ast.BinOpKind.add, top.op); try testing.expectEqual(@as(f64, 1), top.left.number); const mul = top.right.binop; - try testing.expectEqual(Ast.BinOpKind.mul, mul.op); + try testing.expectEqual(ast.BinOpKind.mul, mul.op); try testing.expectEqual(@as(f64, 2), mul.left.number); try testing.expectEqual(@as(f64, 3), mul.right.number); } @@ -551,10 +551,10 @@ test "XPath.Parser: arithmetic left-associativity" { defer fx.arena.deinit(); // Expected AST: sub(sub(1, 2), 3) const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.sub, top.op); + try testing.expectEqual(ast.BinOpKind.sub, top.op); try testing.expectEqual(@as(f64, 3), top.right.number); const inner = top.left.binop; - try testing.expectEqual(Ast.BinOpKind.sub, inner.op); + try testing.expectEqual(ast.BinOpKind.sub, inner.op); try testing.expectEqual(@as(f64, 1), inner.left.number); try testing.expectEqual(@as(f64, 2), inner.right.number); } @@ -562,21 +562,21 @@ test "XPath.Parser: arithmetic left-associativity" { test "XPath.Parser: div and mod are operator-position keywords" { var fx = try parseFixture("7 div 2"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.div, fx.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op); var fx2 = try parseFixture("7 mod 2"); defer fx2.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.mod, fx2.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op); } test "XPath.Parser: comparison operators" { inline for (.{ - .{ "1 = 2", Ast.BinOpKind.eq }, - .{ "1 != 2", Ast.BinOpKind.neq }, - .{ "1 < 2", Ast.BinOpKind.lt }, - .{ "1 <= 2", Ast.BinOpKind.lte }, - .{ "1 > 2", Ast.BinOpKind.gt }, - .{ "1 >= 2", Ast.BinOpKind.gte }, + .{ "1 = 2", ast.BinOpKind.eq }, + .{ "1 != 2", ast.BinOpKind.neq }, + .{ "1 < 2", ast.BinOpKind.lt }, + .{ "1 <= 2", ast.BinOpKind.lte }, + .{ "1 > 2", ast.BinOpKind.gt }, + .{ "1 >= 2", ast.BinOpKind.gte }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -589,8 +589,8 @@ test "XPath.Parser: logical or/and short-circuit chain" { defer fx.arena.deinit(); // Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.or_, top.op); - try testing.expectEqual(Ast.BinOpKind.and_, top.right.binop.op); + try testing.expectEqual(ast.BinOpKind.or_, top.op); + try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op); } test "XPath.Parser: unary minus" { @@ -602,7 +602,7 @@ test "XPath.Parser: unary minus" { test "XPath.Parser: union" { var fx = try parseFixture("a | b"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.union_, fx.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op); } test "XPath.Parser: absolute path / alone is document root" { @@ -628,8 +628,8 @@ test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" { const path = fx.expr.path; try testing.expect(path.absolute); try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); - try testing.expectEqual(Ast.TypeTest.node, path.steps[0].node_test.type_test); + try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test); try testing.expectEqualStrings("foo", path.steps[1].node_test.name); } @@ -639,7 +639,7 @@ test "XPath.Parser: relative path child::foo/bar" { const path = fx.expr.path; try testing.expect(!path.absolute); try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.child, path.steps[0].axis); + try testing.expectEqual(ast.Axis.child, path.steps[0].axis); try testing.expectEqualStrings("foo", path.steps[0].node_test.name); try testing.expectEqualStrings("bar", path.steps[1].node_test.name); } @@ -649,32 +649,32 @@ test "XPath.Parser: abbreviated steps . and .." { defer fx.arena.deinit(); const path = fx.expr.path; try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.self, path.steps[0].axis); - try testing.expectEqual(Ast.Axis.parent, path.steps[1].axis); + try testing.expectEqual(ast.Axis.self, path.steps[0].axis); + try testing.expectEqual(ast.Axis.parent, path.steps[1].axis); } test "XPath.Parser: attribute axis @class" { var fx = try parseFixture("@class"); defer fx.arena.deinit(); const step = fx.expr.path.steps[0]; - try testing.expectEqual(Ast.Axis.attribute, step.axis); + try testing.expectEqual(ast.Axis.attribute, step.axis); try testing.expectEqualStrings("class", step.node_test.name); } test "XPath.Parser: all 12 named axes parse correctly" { inline for (.{ - .{ "child::a", Ast.Axis.child }, - .{ "descendant::a", Ast.Axis.descendant }, - .{ "descendant-or-self::a", Ast.Axis.descendant_or_self }, - .{ "self::a", Ast.Axis.self }, - .{ "parent::a", Ast.Axis.parent }, - .{ "ancestor::a", Ast.Axis.ancestor }, - .{ "ancestor-or-self::a", Ast.Axis.ancestor_or_self }, - .{ "following-sibling::a", Ast.Axis.following_sibling }, - .{ "preceding-sibling::a", Ast.Axis.preceding_sibling }, - .{ "following::a", Ast.Axis.following }, - .{ "preceding::a", Ast.Axis.preceding }, - .{ "namespace::a", Ast.Axis.namespace }, + .{ "child::a", ast.Axis.child }, + .{ "descendant::a", ast.Axis.descendant }, + .{ "descendant-or-self::a", ast.Axis.descendant_or_self }, + .{ "self::a", ast.Axis.self }, + .{ "parent::a", ast.Axis.parent }, + .{ "ancestor::a", ast.Axis.ancestor }, + .{ "ancestor-or-self::a", ast.Axis.ancestor_or_self }, + .{ "following-sibling::a", ast.Axis.following_sibling }, + .{ "preceding-sibling::a", ast.Axis.preceding_sibling }, + .{ "following::a", ast.Axis.following }, + .{ "preceding::a", ast.Axis.preceding }, + .{ "namespace::a", ast.Axis.namespace }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -685,7 +685,7 @@ test "XPath.Parser: all 12 named axes parse correctly" { test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" { var fx = try parseFixture("wibble::a"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.Axis.unknown, fx.expr.path.steps[0].axis); + try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis); } test "XPath.Parser: wildcard *" { @@ -706,10 +706,10 @@ test "XPath.Parser: namespace-prefixed name and wildcard" { test "XPath.Parser: node-type tests" { inline for (.{ - .{ "node()", Ast.TypeTest.node }, - .{ "text()", Ast.TypeTest.text }, - .{ "comment()", Ast.TypeTest.comment }, - .{ "processing-instruction()", Ast.TypeTest.processing_instruction }, + .{ "node()", ast.TypeTest.node }, + .{ "text()", ast.TypeTest.text }, + .{ "comment()", ast.TypeTest.comment }, + .{ "processing-instruction()", ast.TypeTest.processing_instruction }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -720,7 +720,7 @@ test "XPath.Parser: node-type tests" { test "XPath.Parser: processing-instruction with literal target — consumed but ignored" { var fx = try parseFixture("processing-instruction('xml-stylesheet')"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); + try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); } test "XPath.Parser: predicate on step" { @@ -770,7 +770,7 @@ test "XPath.Parser: filter with // tail prepends descendant-or-self" { defer fx.arena.deinit(); const fp = fx.expr.filter_path; try testing.expectEqual(@as(usize, 2), fp.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, fp.steps[0].axis); + try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis); try testing.expectEqualStrings("b", fp.steps[1].node_test.name); } @@ -788,7 +788,7 @@ test "XPath.Parser: complex representative expression" { const path = fx.expr.path; try testing.expect(path.absolute); try testing.expectEqual(@as(usize, 3), path.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis); try testing.expectEqualStrings("div", path.steps[1].node_test.name); try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len); try testing.expectEqualStrings("p", path.steps[2].node_test.name); diff --git a/src/browser/xpath/functions.zig b/src/browser/xpath/functions.zig index 52cb4d14..973bd53a 100644 --- a/src/browser/xpath/functions.zig +++ b/src/browser/xpath/functions.zig @@ -41,7 +41,7 @@ const lp = @import("lightpanda"); const Node = @import("../webapi/Node.zig"); -const Result = @import("result.zig"); +const result = @import("result.zig"); const Frame = lp.Frame; const Element = Node.Element; @@ -62,10 +62,10 @@ pub const Error = error{ pub fn call( arena: Allocator, name: []const u8, - args: []const Result.Result, + args: []const result.Result, ctx: *Node, frame: *Frame, -) Error!Result.Result { +) Error!result.Result { // -- Node-set -- if (eql(name, "count")) return .{ .number = countFn(args) }; if (eql(name, "id")) return idFn(arena, args, ctx, frame); @@ -86,8 +86,8 @@ pub fn call( if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) }; // -- Boolean -- - if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else Result.toBoolean(args[0]) }; - if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !Result.toBoolean(args[0]) }; + if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) }; + if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) }; if (eql(name, "true")) return .{ .boolean = true }; if (eql(name, "false")) return .{ .boolean = false }; if (eql(name, "lang")) return .{ .boolean = false }; @@ -95,9 +95,9 @@ pub fn call( // -- Number -- if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) }; if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) }; - if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try Result.toNumber(arena, args[0])) }; - if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try Result.toNumber(arena, args[0])) }; - if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try Result.toNumber(arena, args[0])) }; + if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) }; + if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) }; + if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) }; return error.UnknownFunction; } @@ -108,12 +108,12 @@ inline fn eql(a: []const u8, b: []const u8) bool { // ----- node-set fns ----- -fn countFn(args: []const Result.Result) f64 { +fn countFn(args: []const result.Result) f64 { if (args.len == 0 or args[0] != .node_set) return 0; return @floatFromInt(args[0].node_set.len); } -fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame) Error!Result.Result { +fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result { if (args.len == 0) return .{ .node_set = &.{} }; // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar @@ -123,12 +123,12 @@ fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame var buf = std.Io.Writer.Allocating.init(arena); for (args[0].node_set, 0..) |n, i| { if (i > 0) try buf.writer.writeByte(' '); - const sv = try Result.stringValueOf(arena, n); + const sv = try result.stringValueOf(arena, n); try buf.writer.writeAll(sv); } break :blk buf.written(); } - break :blk try Result.toString(arena, args[0]); + break :blk try result.toString(arena, args[0]); }; // `ctx.ownerDocument || ctx` — document nodes own themselves. @@ -144,7 +144,7 @@ fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame return .{ .node_set = seen.keys() }; } -fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const node = firstNodeOrCtx(args, ctx) orelse return ""; // For Element, `getLocalName` returns a slice into `_tag_name` // (lowercase, namespace-prefix stripped) — lifetime exceeds the @@ -154,7 +154,7 @@ fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error! return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); } -fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const node = firstNodeOrCtx(args, ctx) orelse return ""; // Diverges from `local-name` only on namespaced elements: `name` // keeps the prefix (`ns:foo`), `local-name` strips it (`foo`). @@ -163,7 +163,7 @@ fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]con return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); } -fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { +fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node { if (args.len == 0) return ctx; if (args[0] != .node_set) return null; if (args[0].node_set.len == 0) return null; @@ -172,64 +172,64 @@ fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { // ----- string fns ----- -fn stringFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { - if (args.len == 0) return try Result.stringValueOf(arena, ctx); - return try Result.toString(arena, args[0]); +fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { + if (args.len == 0) return try result.stringValueOf(arena, ctx); + return try result.toString(arena, args[0]); } -fn concatFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 { var buf = std.Io.Writer.Allocating.init(arena); for (args) |a| { - const s = try Result.toString(arena, a); + const s = try result.toString(arena, a); try buf.writer.writeAll(s); } return buf.written(); } -fn startsWithFn(arena: Allocator, args: []const Result.Result) Error!bool { +fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool { if (args.len < 2) return false; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); return std.mem.startsWith(u8, s1, s2); } -fn containsFn(arena: Allocator, args: []const Result.Result) Error!bool { +fn containsFn(arena: Allocator, args: []const result.Result) Error!bool { if (args.len < 2) return false; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); return std.mem.indexOf(u8, s1, s2) != null; } -fn substringBeforeFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); if (std.mem.indexOf(u8, s1, s2)) |idx| { return s1[0..idx]; } return ""; } -fn substringAfterFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); if (std.mem.indexOf(u8, s1, s2)) |idx| { return s1[idx + s2.len ..]; } return ""; } -fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s = try Result.toString(arena, args[0]); - const start_raw = try Result.toNumber(arena, args[1]); + const s = try result.toString(arena, args[0]); + const start_raw = try result.toNumber(arena, args[1]); if (std.math.isNan(start_raw)) return ""; const start = roundHalfToPosInf(start_raw); const s_len: f64 = @floatFromInt(s.len); if (args.len >= 3) { - const len_raw = try Result.toNumber(arena, args[2]); + const len_raw = try result.toNumber(arena, args[2]); if (std.math.isNan(len_raw)) return ""; const len = roundHalfToPosInf(len_raw); const sum = start - 1 + len; @@ -249,22 +249,22 @@ fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { return s[si..]; } -fn stringLengthFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { +fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 { const s = if (args.len == 0) - try Result.stringValueOf(arena, ctx) + try result.stringValueOf(arena, ctx) else - try Result.toString(arena, args[0]); + try result.toString(arena, args[0]); // Polyfill returns UTF-16 code units; we return UTF-8 bytes. They // agree on ASCII (the gem's 91-case battery is ASCII-only). See // .claude/skills/xpath-port/NOTES.md for the divergence rationale. return @floatFromInt(s.len); } -fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const s = if (args.len == 0) - try Result.stringValueOf(arena, ctx) + try result.stringValueOf(arena, ctx) else - try Result.toString(arena, args[0]); + try result.toString(arena, args[0]); const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); if (trimmed.len == 0) return ""; @@ -283,11 +283,11 @@ fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) E return buf.written(); } -fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 3) return ""; - const s = try Result.toString(arena, args[0]); - const from = try Result.toString(arena, args[1]); - const to = try Result.toString(arena, args[2]); + const s = try result.toString(arena, args[0]); + const from = try result.toString(arena, args[1]); + const to = try result.toString(arena, args[2]); var buf = std.Io.Writer.Allocating.init(arena); for (s) |c| { @@ -303,20 +303,20 @@ fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { // ----- number fns ----- -fn numberFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { +fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 { if (args.len == 0) { - const sv = try Result.stringValueOf(arena, ctx); - return Result.stringToNumber(sv); + const sv = try result.stringValueOf(arena, ctx); + return result.stringToNumber(sv); } - return try Result.toNumber(arena, args[0]); + return try result.toNumber(arena, args[0]); } -fn sumFn(arena: Allocator, args: []const Result.Result) Error!f64 { +fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 { if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64); var total: f64 = 0; for (args[0].node_set) |n| { - const sv = try Result.stringValueOf(arena, n); - total += Result.stringToNumber(sv); + const sv = try result.stringValueOf(arena, n); + total += result.stringToNumber(sv); } return total; } @@ -342,7 +342,7 @@ const Tokenizer = @import("Tokenizer.zig"); const Parser = @import("Parser.zig"); const Evaluator = @import("Evaluator.zig"); -fn evalScalar(a: Allocator, src: []const u8) !Result.Result { +fn evalScalar(a: Allocator, src: []const u8) !result.Result { const expr = try Parser.parse(a, src); // Synthetic Frame/Node pointers — the public `evaluate` entry only // touches the Frame for path/axis evaluation. Pure-scalar expressions From 0b0a34c4a24be37e8158129c84627c3a5ab052e9 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Fri, 8 May 2026 08:42:07 +0200 Subject: [PATCH 11/12] cdp: match closed set of axis names in isXPathQuery The previous `::` heuristic accepted any identifier-like character before `::`, which misrouted CSS pseudo-elements (`a::before`, `div::after`) to the XPath evaluator. Walk back the run of [a-zA-Z-] characters and look the candidate up in a StaticStringMap of the 13 XPath 1.0 named axes, so only real axis names match. --- src/cdp/domains/dom.zig | 45 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 32af266c..9f7e9eac 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -92,6 +92,26 @@ fn getDocument(cmd: *CDP.Command) !void { return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{}); } +// Closed set of XPath 1.0 named axes. Matched literally before `::` so +// CSS pseudo-elements (`a::before`, `div::first-line`) don't get +// misrouted to the XPath evaluator just because they have an +// identifier-looking word before `::`. +const xpath_axis_names = std.StaticStringMap(void).initComptime(.{ + .{ "child", {} }, + .{ "descendant", {} }, + .{ "descendant-or-self", {} }, + .{ "self", {} }, + .{ "parent", {} }, + .{ "ancestor", {} }, + .{ "ancestor-or-self", {} }, + .{ "following-sibling", {} }, + .{ "preceding-sibling", {} }, + .{ "following", {} }, + .{ "preceding", {} }, + .{ "attribute", {} }, + .{ "namespace", {} }, +}); + // Polyfill-parity heuristic (decision #2/#9): treat the query as XPath // when it begins with a path operator or contains an axis specifier; // otherwise fall through to CSS. Lifted from capybara-lightpanda's @@ -104,14 +124,21 @@ fn isXPathQuery(q: []const u8) bool { if (q[1] == '/') return true; if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; } - // Require axis-name shape immediately before `::` so CSS pseudo-elements - // (`a::before`) and attribute values containing `::` (`[data-x="x::y"]`) - // aren't misrouted to the XPath evaluator. + // For `::` to be an XPath axis separator, the identifier immediately + // before it must be one of the 13 named axes. Walk back the run of + // [a-zA-Z-] characters and look it up in the closed set. var idx: usize = 0; while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) { if (hit == 0) continue; - const c = q[hit - 1]; - if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-') return true; + var start = hit; + while (start > 0) { + const c = q[start - 1]; + const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-'; + if (!is_axis_char) break; + start -= 1; + } + if (start == hit) continue; + if (xpath_axis_names.has(q[start..hit])) return true; } return false; } @@ -711,6 +738,14 @@ test "cdp.dom: isXPathQuery heuristic" { try std.testing.expect(!isXPathQuery("[data-x]")); try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS try std.testing.expect(!isXPathQuery(".x")); // leading dot without / + + // CSS pseudo-elements: identifier before `::` is not an XPath axis name. + try std.testing.expect(!isXPathQuery("a::before")); + try std.testing.expect(!isXPathQuery("div::after")); + try std.testing.expect(!isXPathQuery("p::first-line")); + try std.testing.expect(!isXPathQuery("input::placeholder")); + // Attribute selector with `::` inside a literal — nothing axis-like before it. + try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]")); } test "cdp.dom: querySelector unknown search id" { From d8b9391e337511f8d1b66d212c34c79549e3d6f4 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Fri, 8 May 2026 08:58:07 +0200 Subject: [PATCH 12/12] xpath: drop internal references from comments Strip mentions of the private gem and its internal paths from xpath module docstrings, the conformance test header, and the dom dispatch heuristic. Comments now describe behavior directly without pointing at sources public readers can't access. --- .../tests/xpath/xpath_conformance.html | 5 ++--- src/browser/xpath/Evaluator.zig | 20 +++++++++---------- src/browser/xpath/Parser.zig | 17 ++++++++-------- src/browser/xpath/Tokenizer.zig | 2 -- src/browser/xpath/ast.zig | 6 ++---- src/browser/xpath/functions.zig | 20 +++++++++---------- src/browser/xpath/result.zig | 10 ++++------ src/cdp/domains/dom.zig | 7 +++---- 8 files changed, 37 insertions(+), 50 deletions(-) diff --git a/src/browser/tests/xpath/xpath_conformance.html b/src/browser/tests/xpath/xpath_conformance.html index f200ecbb..7080ecb6 100644 --- a/src/browser/tests/xpath/xpath_conformance.html +++ b/src/browser/tests/xpath/xpath_conformance.html @@ -46,9 +46,8 @@